Code
library(tidyverse)
library(tidymodels)
library(glmnet)
library(discrim)
library(rpart)
library(rpart.plot)
library(baguette)
library(tidyclust)
library(caret)
library(RColorBrewer)library(tidyverse)
library(tidymodels)
library(glmnet)
library(discrim)
library(rpart)
library(rpart.plot)
library(baguette)
library(tidyclust)
library(caret)
library(RColorBrewer)sample_grid <- matrix(c("Bear", "Bee", "Meadow", "Bear", "Meadow", "Meadow", "Bee", "Meadow", "Bee"),3,3,byrow=TRUE)
sample_grid [,1] [,2] [,3]
[1,] "Bear" "Bee" "Meadow"
[2,] "Bear" "Meadow" "Meadow"
[3,] "Bee" "Meadow" "Bee"
sample_grid2 <- matrix(c("Meadow", "Meadow", "Bee", "Meadow", "Bee", "Meadow", "Bee", "Meadow", "Meadow"),3,3,byrow=TRUE)
sample_grid2 [,1] [,2] [,3]
[1,] "Meadow" "Meadow" "Bee"
[2,] "Meadow" "Bee" "Meadow"
[3,] "Bee" "Meadow" "Meadow"
big_grid1 <- matrix(c("Deer", "Meadow", "Bee", "Bear", "Fox", "Wolf", "Meadow", "Meadow", "Trout", "Stream",
"Deer", "Eagle", "Meadow", "Trout", "Stream", "Fox", "Rabbit", "Stream", "Dragonfly",
"Stream"),4,5,byrow=TRUE)
big_grid1 [,1] [,2] [,3] [,4] [,5]
[1,] "Deer" "Meadow" "Bee" "Bear" "Fox"
[2,] "Wolf" "Meadow" "Meadow" "Trout" "Stream"
[3,] "Deer" "Eagle" "Meadow" "Trout" "Stream"
[4,] "Fox" "Rabbit" "Stream" "Dragonfly" "Stream"
twenty_seven_x <- rep("x", 27)
super_vec <- append(twenty_seven_x,
c("x", "x",
"Deer", "Meadow", "Bee", "Bear", "Fox",
"x", "x",
"x", "x",
"Wolf", "Meadow", "Meadow", "Trout", "Stream",
"x", "x",
"x", "x",
"Deer", "Eagle", "Meadow", "Trout", "Stream",
"x", "x",
"x", "x",
"Fox", "Rabbit", "Stream", "Dragonfly","Stream",
"x", "x"))
super_grid1 <- matrix(super_vec,7,9,byrow=TRUE)
super_grid1 [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9]
[1,] "x" "x" "x" "x" "x" "x" "x" "x" "x"
[2,] "x" "x" "x" "x" "x" "x" "x" "x" "x"
[3,] "x" "x" "x" "x" "x" "x" "x" "x" "x"
[4,] "x" "x" "Deer" "Meadow" "Bee" "Bear" "Fox" "x" "x"
[5,] "x" "x" "Wolf" "Meadow" "Meadow" "Trout" "Stream" "x" "x"
[6,] "x" "x" "Deer" "Eagle" "Meadow" "Trout" "Stream" "x" "x"
[7,] "x" "x" "Fox" "Rabbit" "Stream" "Dragonfly" "Stream" "x" "x"
cards <- c(rep("Bear", 12),
rep("Bee", 8),
rep("Meadow", 20),
rep("Trout", 10),
rep("Eagle", 8),
rep("Rabbit", 8),
rep("Dragonfly", 8),
rep("Fox", 12),
rep("Deer", 12),
rep("Stream", 20),
rep("Wolf", 12)
)
generate_grid <- function(pool, partial_grid = NULL){
if(is.null(partial_grid) == FALSE){
blanks <- which(partial_grid == "x", TRUE)
n = 20 - nrow(blanks)
}else{
n = 20
}
sample <- sample(pool, n)
if(is.null(partial_grid) == FALSE){
board <- partial_grid
for (i in 1:nrow(blanks)){
loc <- c(blanks[[i, 1]], blanks[[i, 2]])
board[blanks[[i, 1]], blanks[[i, 2]]] = sample[i]
}
}else{
board <- matrix(sample, nrow=4, ncol=5, byrow=TRUE)
}
return(board)
}
generate_grid(cards) [,1] [,2] [,3] [,4] [,5]
[1,] "Eagle" "Bear" "Dragonfly" "Stream" "Bear"
[2,] "Eagle" "Stream" "Meadow" "Wolf" "Wolf"
[3,] "Bee" "Eagle" "Eagle" "Fox" "Bear"
[4,] "Wolf" "Deer" "Eagle" "Stream" "Deer"
find_cardinals <- function(i, j, grid){
cardinals <- list()
maxrow = nrow(grid)
maxcol = ncol(grid)
if(i+1 <= maxrow){
cardinals[[length(cardinals)+1]] <- c(i+1,j)
}
if(i-1 > 0){
cardinals[[length(cardinals)+1]] <- c(i-1,j)
}
if(j+1 <= maxcol){
cardinals[[length(cardinals)+1]] <- c(i,j+1)
}
if(j-1 > 0){
cardinals[[length(cardinals)+1]] <- c(i,j-1)
}
return(cardinals)
}find_two_spaces <- function(i, j, grid){
two_space <- list()
maxrow = nrow(grid)
maxcol = ncol(grid)
if(i+1 <= maxrow){
two_space[[length(two_space)+1]] <- c(i+1,j)
}
if(i+2 <= maxrow){
two_space[[length(two_space)+1]] <- c(i+2,j)
}
if(i-1 > 0){
two_space[[length(two_space)+1]] <- c(i-1,j)
}
if(i-2 > 0){
two_space[[length(two_space)+1]] <- c(i-2,j)
}
if(j+1 <= maxcol){
two_space[[length(two_space)+1]] <- c(i,j+1)
}
if(j+2 <= maxcol){
two_space[[length(two_space)+1]] <- c(i,j+2)
}
if(j-1 > 0){
two_space[[length(two_space)+1]] <- c(i,j-1)
}
if(j-2 > 0){
two_space[[length(two_space)+1]] <- c(i,j-2)
}
if(i+1 <= maxrow && j+1 <= maxcol){
two_space[[length(two_space)+1]] <- c(i+1,j+1)
}
if(i+1 <= maxrow && j-1 > 0){
two_space[[length(two_space)+1]] <- c(i+1,j-1)
}
if(i-1 > 0 && j+1 <= maxcol){
two_space[[length(two_space)+1]] <- c(i-1,j+1)
}
if(i-1 > 0 && j-1 > 0){
two_space[[length(two_space)+1]] <- c(i-1,j-1)
}
return(two_space)
}find_more_meadows <- function(i, j, grid, meadow_list){
meadow_list[[length(meadow_list)+1]] = as.double(c(i,j))
current_caridnals <- find_cardinals(i, j, grid)
for (k in current_caridnals){
if (grid[k[1],k[2]] == "Meadow"){
if ((list(k) %in% meadow_list) == FALSE){
meadow_list = find_more_meadows(as.double(k[1]),as.double(k[2]), grid, meadow_list)
}
}
}
return(meadow_list)
}find_more_streams <- function(i, j, grid, stream_list){
stream_list[[length(stream_list)+1]] = as.double(c(i,j))
current_caridnals <- find_cardinals(i, j, grid)
for (k in current_caridnals){
if (grid[k[1],k[2]] == "Stream"){
if ((list(k) %in% stream_list) == FALSE){
stream_list = find_more_streams(as.double(k[1]),as.double(k[2]), grid, stream_list)
}
}
}
return(stream_list)
}score_grid <- function(grid, individual=FALSE){
score = 0
meadow_patch = list()
first_meadow = TRUE
stream_patch = list()
first_stream = TRUE
dragonfly_list = list()
deer_row <- c()
deer_col <- c()
# first_wolf = TRUE
num_wolves = 0
bear_score = 0
bee_score = 0
meadow_score = 0
trout_score = 0
eagle_score = 0
rabbit_score = 0
dragonfly_score = 0
fox_score = 0
deer_score = 0
stream_score = 0
wolf_score = 0
diversity_score = 0
for (i in 1:nrow(grid)) {
for (j in 1:ncol(grid)) {
current_caridnals <- find_cardinals(i, j, grid)
if(grid[i,j] == "Bear"){
for (k in current_caridnals){
if (grid[k[1],k[2]] == "Bee" || grid[k[1],k[2]] == "Trout"){
score = score + 2
bear_score = bear_score + 2
}
}
}
if(grid[i,j] == "Bee"){
for (k in current_caridnals){
if (grid[k[1],k[2]] == "Meadow"){
score = score + 3
bee_score = bee_score + 3
}
}
}
if(grid[i,j] == "Meadow"){
if (first_meadow == TRUE){
first_meadow = FALSE
first_patch = list()
completed_patch = find_more_meadows(as.double(i), as.double(j), grid, first_patch)
meadow_patch[[length(meadow_patch)+1]] = completed_patch
}else{
exist = FALSE
for (x in 1:length(meadow_patch)){
if (list(as.double(c(i,j))) %in% meadow_patch[[x]]){
exist = TRUE
}
}
if (exist == FALSE){
new_patch = list()
completed_patch = find_more_meadows(as.double(i), as.double(j), grid, new_patch)
meadow_patch[[length(meadow_patch)+1]] = completed_patch
}
}
}
if(grid[i,j] == "Trout"){
for (k in current_caridnals){
if (grid[k[1],k[2]] == "Dragonfly" || grid[k[1],k[2]] == "Stream"){
score = score + 2
trout_score = trout_score + 2
}
}
}
if(grid[i,j] == "Eagle"){
two_space <- find_two_spaces(i, j, grid)
for (k in two_space){
if (grid[k[1],k[2]] == "Trout" || grid[k[1],k[2]] == "Rabbit"){
score = score + 2
eagle_score = eagle_score + 2
}
}
}
if(grid[i,j] == "Rabbit"){
score = score + 1
rabbit_score = rabbit_score + 1
}
if(grid[i,j] == "Dragonfly"){
dragonfly_list[[length(dragonfly_list)+1]] = as.double(c(i,j))
}
if(grid[i,j] == "Fox"){
score_it = TRUE
for (k in current_caridnals){
if (grid[k[1],k[2]] == "Bear" || grid[k[1],k[2]] == "Wolf"){
score_it = FALSE
}
}
if (score_it){
score = score + 3
fox_score = fox_score + 3
}
}
if(grid[i,j] == "Deer"){
deer_row <- append(deer_row, i)
deer_col <- append(deer_col, j)
}
if(grid[i,j] == "Stream"){
if (first_stream == TRUE){
first_stream = FALSE
first_patch = list()
completed_patch = find_more_streams(as.double(i), as.double(j), grid, first_patch)
stream_patch[[length(stream_patch)+1]] = completed_patch
}else{
exist = FALSE
for (x in 1:length(stream_patch)){
if (list(as.double(c(i,j))) %in% stream_patch[[x]]){
exist = TRUE
}
}
if (exist == FALSE){
new_patch = list()
completed_patch = find_more_streams(as.double(i), as.double(j), grid, new_patch)
stream_patch[[length(stream_patch)+1]] = completed_patch
}
}
}
if(grid[i,j] == "Wolf"){
# temporary stand in, can only be scored properly with more than 1 player
# if (first_wolf == TRUE){
# score = score + 8
# wolf_score = wolf_score + 8
# first_wolf == FALSE
# }
num_wolves = num_wolves + 1
}
}
}
for (i in meadow_patch){
if (length(i) == 2){
score = score + 3
meadow_score = meadow_score + 3
}else if (length(i) == 3){
score = score + 6
meadow_score = meadow_score + 6
}else if (length(i) == 4){
score = score + 10
meadow_score = meadow_score + 10
}else if (length(i) >= 5){
score = score + 15
meadow_score = meadow_score + 15
}
}
largest_stream = 0
for (i in stream_patch){
if (length(i) > largest_stream){
largest_stream = length(i)
}
}
# temporary scoring for largest stream, can only be scored properly with more than 1 player
# if (largest_stream > 0){
# score = score + 5
# stream_score = stream_score + 5
# }
for (d in dragonfly_list){
current_caridnals <- find_cardinals(d[1], d[2], grid)
largest_score = 0
for (k in current_caridnals){
if (grid[k[1],k[2]] == "Stream"){
for (s in stream_patch){
if((list(k) %in% s) == TRUE){
current_score = 2 * length(s)
if (current_score > largest_score){
largest_score = current_score
}
}
}
}
}
score = score + largest_score
dragonfly_score = dragonfly_score + largest_score
}
score = score + 2*length(unique(deer_row))
score = score + 2*length(unique(deer_col))
deer_score = deer_score + 2*length(unique(deer_row))
deer_score = deer_score + 2*length(unique(deer_col))
diversity_matrix <- matrix(c(bear_score, bee_score, meadow_score, trout_score, eagle_score, rabbit_score,
dragonfly_score, fox_score, deer_score))
diversity_vector <- c(bear_score, bee_score, meadow_score, trout_score, eagle_score, rabbit_score,
dragonfly_score, fox_score, deer_score)
gaps = colSums(diversity_matrix == 0)[1]
# if(gaps >= 6){
# score = score - 5
# diversity_score = -5
# }else if (gaps == 4){
# score = score + 3
# diversity_score = 3
# }else if (gaps == 3){
# score = score + 7
# diversity_score = 7
# }else if (gaps <= 2){
# score = score + 12
# diversity_score = 12
# }
if(individual==TRUE){
if(largest_stream == 0){
gaps = gaps + 1
}
if(num_wolves == 0){
gaps = gaps + 1
}
if(gaps >= 6){
dv_score = -5
}else if (gaps == 4){
dv_score = 3
}else if (gaps == 3){
dv_score = 7
}else if (gaps <= 2){
dv_score = 12
}else{
dv_score = 0
}
return(c(diversity_vector,largest_stream, num_wolves, dv_score))
}else{
return(c(score, largest_stream, num_wolves, gaps))
}
}solo_score <- function(score_vector){
score = score_vector[1]
stream_size = score_vector[2]
num_wolves = score_vector[3]
num_gaps = score_vector[4]
score = score + stream_size + num_wolves
if(stream_size == 0){
num_gaps = num_gaps + 1
}
if(num_wolves == 0){
num_gaps = num_gaps + 1
}
if(num_gaps >= 6){
score = score - 5
}else if (num_gaps == 4){
score = score + 3
}else if (num_gaps == 3){
score = score + 7
}else if (num_gaps <= 2){
score = score + 12
}
return(score)
}mp_score <- function(score_list){
# each entry in score_list follows the format: c(score, size_of_largest_stream, num_wolves, diversity_gaps)
if(length(score_list) > 2){
more_than_2 = TRUE
}else{
more_than_2 = FALSE
}
score <- rep(0, length(score_list))
stream_size <- rep(0, length(score_list))
num_wolves <- rep(0, length(score_list))
num_gaps <- rep(0, length(score_list))
for (i in 1:length(score_list)){
score[i] = score_list[[i]][1]
stream_size[i] = score_list[[i]][2]
num_wolves[i] = score_list[[i]][3]
num_gaps[i] = score_list[[i]][4] + 2
}
print(score)
stream_size = sort(stream_size, decreasing = TRUE)
num_wolves = sort(num_wolves, decreasing = TRUE)
stream_matrix = matrix(stream_size)
wolf_matrix = matrix(num_wolves)
largest_stream = stream_size[1]
second_stream = stream_size[2]
score_largest_s = TRUE
score_second_s = TRUE
if(colSums(stream_matrix == largest_stream)[1] > 1){
score_second_s = FALSE
}
for (i in 1:length(score_list)){
if (score_list[[i]][2] == largest_stream && largest_stream != 0){
score[i] = score[i] + 8
num_gaps[i] = num_gaps[i] - 1
}
}
if(score_second_s == TRUE){
for (i in 1:length(score_list)){
if (score_list[[i]][2] == second_stream && second_stream != 0){
score[i] = score[i] + 5
num_gaps[i] = num_gaps[i] - 1
}
}
}
print(score)
most_wolves = num_wolves[1]
second_wolves = num_wolves[2]
if(more_than_2){
third_wolves = num_wolves[3]
}
score_most_w = TRUE
score_second_w = TRUE
score_third_w = TRUE
if(colSums(wolf_matrix == most_wolves)[1] > 1){
score_second_w = FALSE
if(colSums(wolf_matrix == most_wolves)[1] > 2){
score_third_w = FALSE
}
}
for (i in 1:length(score_list)){
if (score_list[[i]][3] == most_wolves){
score[i] = score[i] + 12
num_gaps[i] = num_gaps[i] - 1
}
}
if(colSums(wolf_matrix == second_wolves)[1] > 1){
score_third_w = FALSE
}
if(score_second_w == TRUE){
for (i in 1:length(score_list)){
if (score_list[[i]][3] == second_wolves){
score[i] = score[i] + 8
num_gaps[i] = num_gaps[i] - 1
}
}
}
if(score_third_w == TRUE && more_than_2 == TRUE){
for (i in 1:length(score_list)){
if (score_list[[i]][3] == third_wolves){
score[i] = score[i] + 4
num_gaps[i] = num_gaps[i] - 1
}
}
}
print(score)
for(i in 1:length(score_list)){
if(num_gaps[i] >= 6){
score[i] = score[i] - 5
}else if (num_gaps[i] == 4){
score[i] = score[i] + 3
}else if (num_gaps[i] == 3){
score[i] = score[i] + 7
}else if (num_gaps[i] <= 2){
score[i] = score[i] + 12
}
}
return(score)
}baseline_sim <- function(cards, n = 10000){
all_scores <- c()
for (i in 1:n){
sim_grid <- generate_grid(cards)
all_scores <- c(all_scores, solo_score(score_grid(sim_grid)))
}
return(all_scores)
}rw_mcmc <- function(grid, iterations = 1000, acceptance_func = "simple", beta = 0, bp = 500, original = NULL, record_board = FALSE){
start_score <- solo_score(score_grid(grid))
current_grid <- grid
continue <- TRUE
rows <- rep(1:nrow(grid))
cols <- rep(1:ncol(grid))
iter = 0
score_vector <- c()
highest_score <- start_score
highest_grid <- grid
highest_iter <- 0
while(continue){
current_score <- solo_score(score_grid(current_grid))
if(bp == 0){
}else if(iter%%bp == 0){
if(acceptance_func == "annealing dynamic" ||
acceptance_func == "delayed"){
current_grid = highest_grid
current_score = highest_score
}
}
score_vector = c(score_vector, current_score)
if(current_score > highest_score){
highest_score = current_score
highest_grid = current_grid
highest_iter = iter
}
# Choosing swap locations if grid is partially complete
if(is.null(original) == FALSE){
blanks <- which(original == "x", TRUE)
possible <- list()
for (i in 1:nrow(blanks)){
loc <- c(blanks[[i, 1]], blanks[[i, 2]])
possible[[length(possible)+1]] = loc
}
start_loc <- sample(possible, 1)
start_loc <- start_loc[[1]]
start_row <- start_loc[1]
start_col <- start_loc[2]
end_loc <- sample(possible, 1)
end_loc <- end_loc[[1]]
end_row <- end_loc[1]
end_col <- end_loc[2]
while(current_grid[start_row, start_col] == current_grid[end_row, end_col]){
end_loc <- sample(possible, 1)
end_loc <- end_loc[[1]]
end_row <- end_loc[1]
end_col <- end_loc[2]
}
# Choosing swap locations without partial grid
}else{
start_row <- sample(rows, 1)
start_col <- sample(cols, 1)
# Random swap anywhere
end_row <- sample(rows, 1)
end_col <- sample(cols, 1)
while(current_grid[start_row, start_col] == current_grid[end_row, end_col]){
end_row <- sample(rows, 1)
end_col <- sample(cols, 1)
}
}
# Adjacent swap only
# possible_end <- find_cardinals(start_row, start_col, grid)
# end <- sample(possible_end, 1)
# end_row <- end[[1]][1]
# end_col <- end[[1]][2]
proposed_grid <- current_grid
# if(iter == 347 || iter == 668){
# print(proposed_grid)
# }
start <- current_grid[start_row, start_col]
end <- current_grid[end_row, end_col]
proposed_grid[start_row, start_col] = end
proposed_grid[end_row, end_col] = start
proposed_score <- solo_score(score_grid(proposed_grid))
if(acceptance_func == "simple"){
p = proposed_score
c = current_score
if(p <= 0){
p = p + -1*p + 1
c = c + -1*p + 1
}
if(c <= 0){
c = c + -1*c + 1
p = p + -1*c + 1
}
x <- runif(1,0,1)
if(x < p/c){
current_grid <- proposed_grid
}
}
if(acceptance_func == "annealing"){
p = proposed_score
c = current_score
x <- runif(1,0,1)
if(x < exp(beta*p)/exp(beta*c)){
current_grid <- proposed_grid
}
}
if(acceptance_func == "annealing dynamic"){
p = proposed_score
c = current_score
factor = floor(iter/bp) + 1
b = beta*(factor)
x <- runif(1,0,1)
if(x < exp(b*p)/exp(b*c)){
current_grid <- proposed_grid
}
}
if(acceptance_func == "delayed"){
p = proposed_score
c = current_score
if(p <= 0){
p = p + -1*p + 1
c = c + -1*p + 1
}
if(c <= 0){
c = c + -1*c + 1
p = p + -1*c + 1
}
x <- runif(1,0,1)
if(x < p/c && p/c < 1){
p2 = proposed_score
c2 = current_score
factor = floor(iter/500) + 1
b = beta*(factor)
x <- runif(1,0,1)
if(x < (exp(b*p2)*c)/(exp(b*c2)*p)){
current_grid <- proposed_grid
}
}else if(p/c >= 1){
current_grid <- proposed_grid
}
}
iter = iter + 1
if(iter >= iterations){
continue = FALSE
}
}
final_score <- solo_score(score_grid(current_grid))
if(final_score > highest_score){
highest_score = final_score
highest_grid = grid
}
if(record_board == TRUE){
return(c(t(highest_grid), highest_score))
}else{
return(c(highest_score, final_score, start_score, highest_iter, data.frame(score_vector)))
}
}multi_mcmc <- function(iterations, n, acceptance_func = "simple", beta = 0, bp = 500, grid = NULL, boardlist = NULL, record_board = FALSE, cards = NULL, card_name = NULL){
first = TRUE
start_scores <- c()
highest_scores <- c()
highest_iter <- c()
if(is.null(cards) == TRUE){
cards <- c(rep("Bear", 12),
rep("Bee", 8),
rep("Meadow", 20),
rep("Trout", 10),
rep("Eagle", 8),
rep("Rabbit", 8),
rep("Dragonfly", 8),
rep("Fox", 12),
rep("Deer", 12),
rep("Stream", 20),
rep("Wolf", 12)
)
card_name = "default"
}
# Creating proper pool of cards if grid is partially complete
if(is.null(grid) == FALSE){
df <- as.data.frame(table(grid))
animals <- levels(df$grid)
for(i in 1:nrow(grid)){
animal <- animals[i]
num = length(cards[cards == animal])
cards = cards[!cards == animal]
cards <- c(cards, rep(animal, num - df[i,2]))
}
}
for(i in 1:n){
if(is.null(grid) == FALSE){
if(record_board == TRUE){
sim_grid <- generate_grid(cards, grid)
run <- rw_mcmc(sim_grid, iterations, acceptance_func, beta, bp, grid, record_board = TRUE)
}else{
sim_grid <- generate_grid(cards, grid)
run <- rw_mcmc(sim_grid, iterations, acceptance_func, beta, bp, grid)
}
}else if(is.null(boardlist) == FALSE){
if(record_board == TRUE){
run <- rw_mcmc(boardlist[[i]], iterations, acceptance_func, beta, bp, record_board = TRUE)
}else{
run <- rw_mcmc(boardlist[[i]], iterations, acceptance_func, beta, bp)
}
}else{
if(record_board == TRUE){
sim_grid <- generate_grid(cards)
run <- rw_mcmc(sim_grid, iterations, acceptance_func, beta, bp, record_board = TRUE)
}else{
sim_grid <- generate_grid(cards)
run <- rw_mcmc(sim_grid, iterations, acceptance_func, beta, bp)
}
}
if(record_board == TRUE){
if(first == TRUE){
first = FALSE
df <- data.frame(
row1col1 = run[[1]],
row1col2 = run[[2]],
row1col3 = run[[3]],
row1col4 = run[[4]],
row1col5 = run[[5]],
row2col1 = run[[6]],
row2col2 = run[[7]],
row2col3 = run[[8]],
row2col4 = run[[9]],
row2col5 = run[[10]],
row3col1 = run[[11]],
row3col2 = run[[12]],
row3col3 = run[[13]],
row3col4 = run[[14]],
row3col5 = run[[15]],
row4col1 = run[[16]],
row4col2 = run[[17]],
row4col3 = run[[18]],
row4col4 = run[[19]],
row4col5 = run[[20]],
score = run[[21]],
pool = card_name
)
}else{
row <- data.frame(
row1col1 = run[[1]],
row1col2 = run[[2]],
row1col3 = run[[3]],
row1col4 = run[[4]],
row1col5 = run[[5]],
row2col1 = run[[6]],
row2col2 = run[[7]],
row2col3 = run[[8]],
row2col4 = run[[9]],
row2col5 = run[[10]],
row3col1 = run[[11]],
row3col2 = run[[12]],
row3col3 = run[[13]],
row3col4 = run[[14]],
row3col5 = run[[15]],
row4col1 = run[[16]],
row4col2 = run[[17]],
row4col3 = run[[18]],
row4col4 = run[[19]],
row4col5 = run[[20]],
score = run[[21]],
pool = card_name
)
df <- rbind(df, row)
}
}else{
start_scores <- c(start_scores, run[[3]])
highest_scores <- c(highest_scores, run[[1]])
highest_iter <- c(highest_iter, run[[4]])
}
}
if(record_board == TRUE){
return(df)
}else{
return(list(start_scores, highest_scores, highest_iter))
}
}set.seed(4)
x1 <- multi_mcmc(2000, 100, "simple")mean(x1[[1]])
sd(x1[[1]])
var(x1[[1]])
max(x1[[1]])
min(x1[[1]])
summary(x1[[1]])mean(x1[[2]])
sd(x1[[2]])
var(x1[[2]])
max(x1[[2]])
min(x1[[2]])
summary(x1[[2]])
summary(x1[[3]])set.seed(4)
x2 <- multi_mcmc(2000, 100, "annealing", 0.8, 250)mean(x2[[1]])
sd(x2[[1]])
var(x2[[1]])
max(x2[[1]])
min(x2[[1]])
summary(x2[[1]])mean(x2[[2]])
sd(x2[[2]])
var(x2[[2]])
max(x2[[2]])
min(x2[[2]])
summary(x2[[2]])
summary(x2[[3]])set.seed(4)
x3 <- multi_mcmc(2000, 100, "annealing dynamic", 0.08, 250)mean(x3[[1]])
sd(x3[[1]])
var(x3[[1]])
max(x3[[1]])
min(x3[[1]])
summary(x3[[1]])mean(x3[[2]])
sd(x3[[2]])
var(x3[[2]])
max(x3[[2]])
min(x3[[2]])
summary(x3[[2]])
summary(x3[[3]])set.seed(4)
x4 <- multi_mcmc(2000, 100, "annealing dynamic", 0.2, 250)mean(x4[[1]])
sd(x4[[1]])
var(x4[[1]])
max(x4[[1]])
min(x4[[1]])
summary(x4[[1]])mean(x4[[2]])
sd(x4[[2]])
var(x4[[2]])
max(x4[[2]])
min(x4[[2]])
summary(x4[[2]])set.seed(49)
x5 <- multi_mcmc(2000, 100, "annealing", 0.8, 250)mean(x5[[1]])
sd(x5[[1]])
var(x5[[1]])
max(x5[[1]])
min(x5[[1]])
summary(x5[[1]])mean(x5[[2]])
sd(x5[[2]])
var(x5[[2]])
max(x5[[2]])
min(x5[[2]])
summary(x5[[2]])
summary(x5[[3]])set.seed(49)
x6 <- multi_mcmc(2000, 100, "delayed", 0.2, 250)mean(x6[[1]])
sd(x6[[1]])
var(x6[[1]])
max(x6[[1]])
min(x6[[1]])
summary(x6[[1]])mean(x6[[2]])
sd(x6[[2]])
var(x6[[2]])
max(x6[[2]])
min(x6[[2]])
summary(x6[[2]])
summary(x6[[3]])set.seed(49)
x7 <- multi_mcmc(1000, 1000, "annealing", 0.8, 250)mean(x7[[1]])
sd(x7[[1]])
var(x7[[1]])
max(x7[[1]])
min(x7[[1]])
summary(x7[[1]])mean(x7[[2]])
sd(x7[[2]])
var(x7[[2]])
max(x7[[2]])
min(x7[[2]])
summary(x7[[2]])
summary(x7[[3]])test_grid1 <- matrix(c("x", "Bee", "Meadow", "x", "x", "x", "Meadow", "Meadow", "x", "x",
"Deer", "Bee", "Meadow", "x", "x", "x", "Bear", "Bee", "Deer",
"x"),4,5,byrow=TRUE)
test_grid1 [,1] [,2] [,3] [,4] [,5]
[1,] "x" "Bee" "Meadow" "x" "x"
[2,] "x" "Meadow" "Meadow" "x" "x"
[3,] "Deer" "Bee" "Meadow" "x" "x"
[4,] "x" "Bear" "Bee" "Deer" "x"
set.seed(4)
partial1 <- multi_mcmc(1000, 100, "annealing", 0.8, 200, test_grid1)mean(partial1[[1]])
sd(partial1[[1]])
var(partial1[[1]])
max(partial1[[1]])
min(partial1[[1]])
summary(partial1[[1]])mean(partial1[[2]])
sd(partial1[[2]])
var(partial1[[2]])
max(partial1[[2]])
min(partial1[[2]])
summary(partial1[[2]])
summary(partial1[[3]])tune <- function(iter, beta, bp, type, boardlist = NULL){
beta_df <- c()
bp_df <- c()
iter_df <- c()
start_score <- c()
highest_score <- c()
iter_at_highest <- c()
highest_score_sd <- c()
highest_iter_sd <- c()
max_highest <- c()
percentile_90 <- c()
score_75_plus <- c()
for(i in beta){
for (j in bp){
for(k in iter){
if(is.null(boardlist) == FALSE){
sim <- multi_mcmc(k, 100, type, i, j, boardlist = boardlist)
}else{
sim <- multi_mcmc(k, 100, type, i, j)
}
beta_df <- c(beta_df, i)
bp_df <- c(bp_df, j)
iter_df <- c(iter_df, k)
start_score <- c(start_score, mean(sim[[1]]))
highest_score <- c(highest_score, mean(sim[[2]]))
iter_at_highest <- c(iter_at_highest, mean(sim[[3]]))
highest_iter_sd <- c(highest_iter_sd, sd(sim[[3]]))
highest_score_sd <- c(highest_score_sd, sd(sim[[2]]))
max_highest <- c(max_highest, max(sim[[2]]))
percentile_90 <- c(percentile_90, quantile(sim[[2]],probs=0.9))
s <- sim[[2]]
score_75_plus <- c(score_75_plus, length(s[s>=75]))
}
}
}
df <- data.frame(
iterations = iter_df,
beta = beta_df,
break_point = bp_df,
mean_start_score = start_score,
mean_highest_score = highest_score,
sd_highest_score = highest_score_sd,
max_score = max_highest,
score_90th_percent = percentile_90,
score_75_plus = score_75_plus,
average_iterations = iter_at_highest,
sd_iterations = highest_iter_sd,
type = type
)
return(df)
}beta <- c(0.3,0.8,0.9)
bp <- c(125, 250)
iter <- c(500, 750)
set.seed(4)
tune1 <- tune(iter, beta, bp, "delayed")tune1generate_100 <- function(pool){
board_list <- list()
for(i in 1:100){
board <- generate_grid(pool)
board_list[[length(board_list)+1]] = board
}
return(board_list)
}# set.seed(88)
set.seed(89)
board100 <- generate_100(cards)tune_exact1 <- tune(1000, 0.99, 250, "annealing dynamic", boardlist = board100)
tune_exact2 <- tune(1000, 0.9, 200, "annealing dynamic", boardlist = board100)
tune_exact3 <- tune(2000, 0.7, 500, "delayed", boardlist = board100)
tune_exact4 <- tune(2000, 0.8, 500, "annealing dynamic", boardlist = board100)
tune_exact5 <- tune(1500, 0.9, 500, "annealing dynamic", boardlist = board100)
tune_exact6 <- tune(750, 0.99, 250, "annealing dynamic", boardlist = board100)
tune_exact7 <- tune(750, 0.3, 250, "delayed", boardlist = board100)
tune_exact8 <- tune(500, 0.99, 250, "annealing dynamic", boardlist = board100)
tune_exact9 <- tune(2000, 0.9, 250, "annealing dynamic", boardlist = board100)
tune_exact10 <- tune(750, 0.9, 125, "annealing dynamic", boardlist = board100)
tune_exact11 <- tune(1500, 0.9, 250, "annealing dynamic", boardlist = board100)final_params <- rbind(tune_exact1,
tune_exact2,
tune_exact3,
tune_exact4,
tune_exact5,
tune_exact6,
tune_exact7,
tune_exact8,
tune_exact9,
tune_exact10,
tune_exact11)Error: object 'tune_exact1' not found
write.csv(final_params,here::here("final-parameters-seed2.csv"), row.names = FALSE)Error in eval(expr, p): object 'final_params' not found
# write.csv(tune1,here::here("new-parameters.csv"),row.names = FALSE)tuned_params1 <- read.csv(here::here("final-parameters.csv"))new_params <- rbind(tuned_params1, tune1)
write.csv(new_params,here::here("new-parameters.csv"), row.names = FALSE)set.seed(45)
sim_grid1 <- generate_grid(cards)
x <- rw_mcmc(sim_grid1, 2000, "annealing dynamic", beta = 0.8, 250)xz <- data.frame(iter = rep(1:length(x[[4]])), scores = x[[4]])
ggplot(aes(x = iter, y = scores), data = xz) +
geom_line()cards <- c(rep("Bear", 12),
rep("Bee", 8),
rep("Meadow", 20),
rep("Trout", 10),
rep("Eagle", 8),
rep("Rabbit", 8),
rep("Dragonfly", 8),
rep("Fox", 12),
rep("Deer", 12),
rep("Stream", 20),
rep("Wolf", 12)
)
dfly_stream <- c(rep("Bear", 6),
rep("Bee", 4),
rep("Meadow", 10),
rep("Trout", 5),
rep("Eagle", 4),
rep("Rabbit", 4),
rep("Dragonfly", 8),
rep("Fox", 6),
rep("Deer", 6),
rep("Stream", 20),
rep("Wolf", 6)
)
bee_meadow <- c(rep("Bear", 6),
rep("Bee", 8),
rep("Meadow", 20),
rep("Trout", 5),
rep("Eagle", 4),
rep("Rabbit", 4),
rep("Dragonfly", 4),
rep("Fox", 6),
rep("Deer", 6),
rep("Stream", 10),
rep("Wolf", 6)
)
low_eag_rab <- c(rep("Bear", 12),
rep("Bee", 8),
rep("Meadow", 20),
rep("Trout", 10),
rep("Eagle", 2),
rep("Rabbit", 2),
rep("Dragonfly", 8),
rep("Fox", 12),
rep("Deer", 12),
rep("Stream", 20),
rep("Wolf", 12)
)startTime <- Sys.time()
db_gen <- multi_mcmc(1000, 2000, "annealing dynamic", 0.9, 200, record_board = TRUE, cards = low_eag_rab, card_name = "low_eagle_rabbit")
endTime <- Sys.time()
print(endTime - startTime)
database_old <- read.csv(here::here("database.csv"))
database_new <- rbind(database_old, db_gen)
database_new <- database_new %>% distinct()
write.csv(database_new, here::here("database.csv"), row.names = FALSE)head(db_gen)database_new <- database_new %>% mutate(score = as.numeric(score))Error: object 'database_new' not found
# write.csv(db_gen, here::here("database.csv"), row.names = FALSE)database_old <- read.csv(here::here("database.csv"))database_new <- rbind(database_old, db_gen)
write.csv(database_new, here::here("database.csv"), row.names = FALSE)database_new <- database_new %>% distinct()database_new <- read.csv(here::here("database.csv"))database_new %>%
mutate(score = as.numeric(score)) %>%
filter(pool == "default") %>%
ggplot(aes(x = score)) +
geom_histogram(binwidth = 3, fill = "steelblue", color = "black")database_new %>%
mutate(score = as.numeric(score)) %>%
filter(pool == "dragonfly_stream") %>%
ggplot(aes(x = score)) +
geom_histogram(binwidth = 3, fill = "steelblue", color = "black")database_new %>%
mutate(score = as.numeric(score)) %>%
filter(pool == "bee_meadow") %>%
ggplot(aes(x = score)) +
geom_histogram(binwidth = 3, fill = "steelblue", color = "black")database_new %>%
mutate(score = as.numeric(score)) %>%
filter(pool == "low_eagle_rabbit") %>%
ggplot(aes(x = score)) +
geom_histogram(binwidth = 3, fill = "steelblue", color = "black")# database_new <- read.csv(here::here("database.csv"))
#
# database_new_ID <- database_new %>%
# mutate(ID = rep(1:50000)) %>%
# select(ID, 1:22)
#
# write.csv(database_new_ID, here::here("database.csv"), row.names = FALSE)database_new <- read.csv(here::here("database.csv"))grids <- database_new %>% select(-c(ID, pool))
dmy <- dummyVars(" ~ .", data = grids)
grids <- data.frame(predict(dmy, newdata = grids))
grids_matrix <- as.matrix(grids)set.seed(4)
km_spec1 <- k_means(num_clusters = 4)
grids_recipe <- recipe(~., data = grids_matrix)
km_wflow1 <- workflow() |>
add_recipe(grids_recipe) |>
add_model(km_spec1)
km_fitted1 <- km_wflow1 |> fit(grids_matrix)
km_fitted1 |> extract_centroids()# A tibble: 4 × 222
.cluster row1col1Bear row1col1Bee row1col1Deer row1col1Dragonfly row1col1Eagle
<fct> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Cluster… 0.0700 0.0306 0.127 0.0474 0.0341
2 Cluster… 0.0520 0.0443 0.137 0.0810 0.0381
3 Cluster… 0.0958 0.0210 0.114 0.0341 0.0352
4 Cluster… 0.142 0.0121 0.108 0.0265 0.0535
# ℹ 216 more variables: row1col1Fox <dbl>, row1col1Meadow <dbl>,
# row1col1Rabbit <dbl>, row1col1Stream <dbl>, row1col1Trout <dbl>,
# row1col1Wolf <dbl>, row1col2Bear <dbl>, row1col2Bee <dbl>,
# row1col2Deer <dbl>, row1col2Dragonfly <dbl>, row1col2Eagle <dbl>,
# row1col2Fox <dbl>, row1col2Meadow <dbl>, row1col2Rabbit <dbl>,
# row1col2Stream <dbl>, row1col2Trout <dbl>, row1col2Wolf <dbl>,
# row1col3Bear <dbl>, row1col3Bee <dbl>, row1col3Deer <dbl>, …
grids_km1 <- kmeans(grids_matrix, centers = 4)
grids_km1$totss[1] 5052189
grids_km1$withinss[1] 371820.1 459162.4 448740.3 210847.5
grids_km1$betweenss[1] 3561619
x <- km_fitted1 |> extract_centroids()
x# A tibble: 4 × 222
.cluster row1col1Bear row1col1Bee row1col1Deer row1col1Dragonfly row1col1Eagle
<fct> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Cluster… 0.0700 0.0306 0.127 0.0474 0.0341
2 Cluster… 0.0520 0.0443 0.137 0.0810 0.0381
3 Cluster… 0.0958 0.0210 0.114 0.0341 0.0352
4 Cluster… 0.142 0.0121 0.108 0.0265 0.0535
# ℹ 216 more variables: row1col1Fox <dbl>, row1col1Meadow <dbl>,
# row1col1Rabbit <dbl>, row1col1Stream <dbl>, row1col1Trout <dbl>,
# row1col1Wolf <dbl>, row1col2Bear <dbl>, row1col2Bee <dbl>,
# row1col2Deer <dbl>, row1col2Dragonfly <dbl>, row1col2Eagle <dbl>,
# row1col2Fox <dbl>, row1col2Meadow <dbl>, row1col2Rabbit <dbl>,
# row1col2Stream <dbl>, row1col2Trout <dbl>, row1col2Wolf <dbl>,
# row1col3Bear <dbl>, row1col3Bee <dbl>, row1col3Deer <dbl>, …
pc <- prcomp(grids_matrix)cumul_vars <- cumsum(pc$sdev^2)/sum(pc$sdev^2)
cumul_vars [1] 0.8262699 0.8344343 0.8401506 0.8447357 0.8481400 0.8512645 0.8543596
[8] 0.8571382 0.8597179 0.8621789 0.8646002 0.8669165 0.8691075 0.8712399
[15] 0.8733117 0.8753313 0.8773469 0.8793509 0.8812528 0.8830664 0.8848660
[22] 0.8866272 0.8883736 0.8900891 0.8917286 0.8933377 0.8949434 0.8965283
[29] 0.8979800 0.8994247 0.9008390 0.9022289 0.9035779 0.9049160 0.9062025
[36] 0.9074680 0.9087246 0.9099753 0.9111725 0.9123417 0.9135056 0.9146304
[43] 0.9157528 0.9168553 0.9179437 0.9190205 0.9200832 0.9211415 0.9221797
[50] 0.9232034 0.9242053 0.9252057 0.9261971 0.9271803 0.9281464 0.9291076
[57] 0.9300644 0.9310161 0.9319635 0.9328935 0.9338167 0.9347370 0.9356410
[64] 0.9365347 0.9374233 0.9382853 0.9391459 0.9399874 0.9408102 0.9416306
[71] 0.9424384 0.9432442 0.9440263 0.9447970 0.9455672 0.9463304 0.9470871
[78] 0.9478354 0.9485749 0.9493072 0.9500344 0.9507528 0.9514698 0.9521774
[85] 0.9528801 0.9535817 0.9542721 0.9549544 0.9556341 0.9563076 0.9569774
[92] 0.9576409 0.9582959 0.9589498 0.9596002 0.9602391 0.9608735 0.9615018
[99] 0.9621293 0.9627451 0.9633553 0.9639575 0.9645549 0.9651397 0.9657195
[106] 0.9662952 0.9668654 0.9674312 0.9679949 0.9685427 0.9690825 0.9696196
[113] 0.9701484 0.9706720 0.9711924 0.9717052 0.9722126 0.9727169 0.9732185
[120] 0.9737144 0.9742058 0.9746965 0.9751795 0.9756567 0.9761317 0.9766042
[127] 0.9770729 0.9775393 0.9780009 0.9784592 0.9789082 0.9793500 0.9797902
[134] 0.9802228 0.9806544 0.9810817 0.9815054 0.9819246 0.9823391 0.9827525
[141] 0.9831575 0.9835572 0.9839513 0.9843437 0.9847296 0.9851109 0.9854880
[148] 0.9858575 0.9862220 0.9865786 0.9869344 0.9872848 0.9876326 0.9879766
[155] 0.9883156 0.9886516 0.9889831 0.9893118 0.9896365 0.9899509 0.9902635
[162] 0.9905738 0.9908800 0.9911832 0.9914812 0.9917689 0.9920532 0.9923336
[169] 0.9926123 0.9928876 0.9931610 0.9934310 0.9936987 0.9939637 0.9942273
[176] 0.9944882 0.9947476 0.9950056 0.9952613 0.9955154 0.9957629 0.9960072
[183] 0.9962475 0.9964865 0.9967241 0.9969598 0.9971932 0.9974256 0.9976518
[190] 0.9978744 0.9980914 0.9983073 0.9985192 0.9987288 0.9989364 0.9991387
[197] 0.9993293 0.9995011 0.9996693 0.9998363 1.0000000 1.0000000 1.0000000
[204] 1.0000000 1.0000000 1.0000000 1.0000000 1.0000000 1.0000000 1.0000000
[211] 1.0000000 1.0000000 1.0000000 1.0000000 1.0000000 1.0000000 1.0000000
[218] 1.0000000 1.0000000 1.0000000 1.0000000
grids_reduced <- pc$x[, 1:8]
grids_pca_km <- kmeans(grids_reduced, 4)
grids_pca_km$totss[1] 4330425
grids_pca_km$withinss[1] 269715.5 127910.6 182482.4 189124.9
grids_pca_km$betweenss[1] 3561191
pc$rotation[,1:2] PC1 PC2
row1col1Bear 2.649657e-03 1.104530e-02
row1col1Bee -1.158698e-03 3.047751e-02
row1col1Deer -9.608389e-04 2.295259e-04
row1col1Dragonfly -2.468062e-03 -4.682377e-02
row1col1Eagle 1.824416e-04 -1.391958e-02
row1col1Fox 6.559876e-04 -8.397230e-03
row1col1Meadow -1.617880e-03 1.233143e-01
row1col1Rabbit -5.718403e-04 -1.134916e-02
row1col1Stream 2.717291e-05 -8.011985e-02
row1col1Trout -6.903208e-04 -1.501071e-02
row1col1Wolf 3.952382e-03 1.055361e-02
row1col2Bear 1.916023e-03 1.010461e-02
row1col2Bee -1.351101e-03 4.262760e-02
row1col2Deer 3.924640e-04 5.558147e-03
row1col2Dragonfly -2.072762e-03 -3.816086e-02
row1col2Eagle -3.175132e-04 -1.769564e-02
row1col2Fox 1.362087e-03 1.408401e-04
row1col2Meadow -8.005904e-04 1.321158e-01
row1col2Rabbit 1.037345e-05 4.010191e-03
row1col2Stream -6.673661e-04 -1.156329e-01
row1col2Trout -1.497311e-03 -3.164247e-02
row1col2Wolf 3.025695e-03 8.574621e-03
row1col3Bear 1.679487e-03 8.169932e-04
row1col3Bee -1.244730e-03 1.390651e-03
row1col3Deer 7.720037e-04 -1.018956e-03
row1col3Dragonfly -2.557329e-03 -2.445883e-03
row1col3Eagle -4.733135e-04 -1.393656e-03
row1col3Fox 1.345418e-03 -1.489911e-03
row1col3Meadow 2.545113e-04 4.632341e-03
row1col3Rabbit 6.186758e-05 1.915093e-03
row1col3Stream -4.852718e-04 -4.707403e-03
row1col3Trout -1.951277e-03 6.158401e-04
row1col3Wolf 2.598634e-03 1.684892e-03
row1col4Bear 2.293700e-03 -4.412166e-03
row1col4Bee -1.396043e-03 -4.126496e-02
row1col4Deer 2.400714e-04 -6.394343e-03
row1col4Dragonfly -2.260096e-03 3.855945e-02
row1col4Eagle -5.383542e-04 1.862509e-02
row1col4Fox 1.271799e-03 -4.512922e-03
row1col4Meadow -6.018285e-04 -1.238800e-01
row1col4Rabbit 1.756345e-05 -4.800012e-03
row1col4Stream -3.031215e-04 1.047287e-01
row1col4Trout -1.519027e-03 3.031563e-02
row1col4Wolf 2.795336e-03 -6.964540e-03
row1col5Bear 2.703025e-03 -8.055195e-03
row1col5Bee -1.288867e-03 -2.940842e-02
row1col5Deer -1.328770e-03 -4.375605e-03
row1col5Dragonfly -2.169101e-03 4.491265e-02
row1col5Eagle 3.252771e-04 1.412125e-02
row1col5Fox 8.465991e-04 4.170017e-04
row1col5Meadow -1.485325e-03 -1.164149e-01
row1col5Rabbit -5.874975e-04 1.146956e-02
row1col5Stream -1.465722e-05 7.826931e-02
row1col5Trout -6.973340e-04 1.448302e-02
row1col5Wolf 3.696651e-03 -5.418622e-03
row2col1Bear 2.127058e-03 5.629902e-03
row2col1Bee -1.432086e-03 6.459046e-02
row2col1Deer 4.460080e-04 3.478263e-03
row2col1Dragonfly -2.224559e-03 -5.989847e-02
row2col1Eagle -3.594363e-04 -1.895993e-02
row2col1Fox 1.180679e-03 -4.479414e-03
row2col1Meadow -1.922216e-03 2.309633e-01
row2col1Rabbit 1.777383e-04 -2.139114e-03
row2col1Stream 9.351417e-05 -1.713423e-01
row2col1Trout -1.351358e-03 -5.142237e-02
row2col1Wolf 3.264658e-03 3.579621e-03
row2col2Bear 1.708217e-03 1.630698e-02
row2col2Bee -1.913927e-03 1.047215e-01
row2col2Deer 9.395536e-04 7.002407e-03
row2col2Dragonfly -1.412518e-03 -3.290655e-02
row2col2Eagle -3.618640e-04 -5.414834e-03
row2col2Fox 9.453663e-04 3.378233e-03
row2col2Meadow 6.454030e-04 1.975911e-01
row2col2Rabbit 6.528876e-04 2.153125e-03
row2col2Stream -7.607296e-04 -2.054084e-01
row2col2Trout -2.107437e-03 -9.332735e-02
row2col2Wolf 1.665048e-03 5.903820e-03
row2col3Bear 1.120035e-03 9.049514e-04
row2col3Bee -7.339252e-04 1.001967e-03
row2col3Deer 8.005560e-04 1.106805e-03
row2col3Dragonfly -1.715858e-03 -5.495156e-04
row2col3Eagle -3.648529e-04 -1.274841e-03
row2col3Fox 9.186916e-04 1.131443e-04
row2col3Meadow 1.179412e-03 1.194301e-03
row2col3Rabbit 6.406921e-04 7.522064e-04
row2col3Stream 2.614248e-06 -2.750386e-03
row2col3Trout -3.304149e-03 7.044022e-05
row2col3Wolf 1.456783e-03 -5.690719e-04
row2col4Bear 1.587036e-03 -1.501841e-02
row2col4Bee -1.652151e-03 -1.043086e-01
row2col4Deer 1.093581e-03 -5.677750e-03
row2col4Dragonfly -1.459588e-03 3.433253e-02
row2col4Eagle -4.694642e-04 5.616182e-03
row2col4Fox 8.778038e-04 -5.249001e-03
row2col4Meadow 3.151903e-04 -1.926665e-01
row2col4Rabbit 5.667673e-04 -3.256551e-03
row2col4Stream -3.017283e-04 1.999165e-01
row2col4Trout -2.217066e-03 9.236426e-02
row2col4Wolf 1.659619e-03 -6.052632e-03
row2col5Bear 2.277521e-03 -8.221136e-03
row2col5Bee -1.541476e-03 -6.234644e-02
row2col5Deer 6.546044e-04 -5.879182e-03
row2col5Dragonfly -2.191282e-03 5.814561e-02
row2col5Eagle -5.182556e-04 2.192808e-02
row2col5Fox 1.272641e-03 1.833105e-03
row2col5Meadow -1.858605e-03 -2.237812e-01
row2col5Rabbit 1.254353e-04 2.580762e-03
row2col5Stream -1.847185e-04 1.730932e-01
row2col5Trout -1.265220e-03 4.835563e-02
row2col5Wolf 3.229356e-03 -5.708395e-03
row3col1Bear 2.306924e-03 7.994807e-03
row3col1Bee -1.531752e-03 6.323926e-02
row3col1Deer 6.647579e-04 4.154626e-03
row3col1Dragonfly -2.151379e-03 -5.965675e-02
row3col1Eagle -3.621139e-04 -1.979888e-02
row3col1Fox 1.461167e-03 -3.351028e-03
row3col1Meadow -2.211332e-03 2.263729e-01
row3col1Rabbit 1.844129e-04 -3.549851e-03
row3col1Stream 9.165953e-05 -1.693116e-01
row3col1Trout -1.544134e-03 -5.122980e-02
row3col1Wolf 3.091790e-03 5.136352e-03
row3col2Bear 1.494113e-03 1.384646e-02
row3col2Bee -1.922189e-03 1.024542e-01
row3col2Deer 1.021120e-03 5.366398e-03
row3col2Dragonfly -1.335315e-03 -3.549629e-02
row3col2Eagle -2.601684e-04 -3.636211e-03
row3col2Fox 1.022202e-03 3.384010e-03
row3col2Meadow 2.588722e-04 1.942868e-01
row3col2Rabbit 6.551038e-04 2.739532e-03
row3col2Stream -8.501485e-04 -2.012267e-01
row3col2Trout -1.840446e-03 -8.781703e-02
row3col2Wolf 1.756857e-03 6.098867e-03
row3col3Bear 1.108852e-03 4.812706e-03
row3col3Bee -7.967926e-04 -3.810522e-03
row3col3Deer 1.090767e-03 6.762310e-04
row3col3Dragonfly -2.016226e-03 1.083598e-03
row3col3Eagle -3.650644e-04 9.184114e-04
row3col3Fox 8.572269e-04 7.106153e-05
row3col3Meadow 1.166387e-03 -5.475146e-03
row3col3Rabbit 6.474442e-04 2.930367e-04
row3col3Stream -1.779278e-04 4.916941e-03
row3col3Trout -3.143275e-03 -3.602121e-03
row3col3Wolf 1.628609e-03 1.158037e-04
row3col4Bear 1.791634e-03 -1.923391e-02
row3col4Bee -1.713301e-03 -1.000108e-01
row3col4Deer 1.050551e-03 -6.882085e-03
row3col4Dragonfly -1.401983e-03 3.529881e-02
row3col4Eagle -3.710658e-04 3.067195e-03
row3col4Fox 7.634127e-04 -3.402387e-03
row3col4Meadow 2.989192e-04 -2.010339e-01
row3col4Rabbit 6.703199e-04 -3.098337e-03
row3col4Stream -6.369771e-04 2.036320e-01
row3col4Trout -2.030981e-03 9.703521e-02
row3col4Wolf 1.579472e-03 -5.371776e-03
row3col5Bear 2.309500e-03 -7.464893e-03
row3col5Bee -1.536044e-03 -6.461880e-02
row3col5Deer 8.185540e-04 -5.640715e-03
row3col5Dragonfly -2.008123e-03 5.880310e-02
row3col5Eagle -3.064221e-04 1.935974e-02
row3col5Fox 1.161713e-03 4.000830e-03
row3col5Meadow -2.022591e-03 -2.260417e-01
row3col5Rabbit 1.086869e-04 2.792596e-03
row3col5Stream 1.751042e-04 1.762938e-01
row3col5Trout -1.621772e-03 4.963707e-02
row3col5Wolf 2.921395e-03 -7.121022e-03
row4col1Bear 2.633496e-03 9.914914e-03
row4col1Bee -1.185664e-03 2.831810e-02
row4col1Deer -1.123767e-03 1.053746e-02
row4col1Dragonfly -2.384150e-03 -4.721670e-02
row4col1Eagle 7.613280e-05 -1.316498e-02
row4col1Fox 9.946044e-04 -7.134276e-03
row4col1Meadow -1.571362e-03 1.135271e-01
row4col1Rabbit -5.519641e-04 -8.784153e-03
row4col1Stream 2.539753e-04 -7.891155e-02
row4col1Trout -6.801618e-04 -1.481484e-02
row4col1Wolf 3.538860e-03 7.728876e-03
row4col2Bear 1.783397e-03 1.000896e-02
row4col2Bee -1.124139e-03 3.823066e-02
row4col2Deer 3.061706e-04 9.272132e-03
row4col2Dragonfly -2.381098e-03 -3.676371e-02
row4col2Eagle -1.474549e-04 -1.947806e-02
row4col2Fox 1.152432e-03 4.065746e-03
row4col2Meadow -8.480964e-04 1.211754e-01
row4col2Rabbit 1.170003e-04 4.737714e-03
row4col2Stream 5.194001e-05 -1.107003e-01
row4col2Trout -1.675275e-03 -3.201130e-02
row4col2Wolf 2.765124e-03 1.146282e-02
row4col3Bear 1.902913e-03 -1.938076e-03
row4col3Bee -1.306573e-03 -2.242984e-03
row4col3Deer 8.238128e-04 3.766445e-03
row4col3Dragonfly -2.607222e-03 1.098951e-03
row4col3Eagle -7.209925e-04 -3.601229e-05
row4col3Fox 1.113900e-03 -1.795936e-03
row4col3Meadow -9.656987e-05 -4.379197e-03
row4col3Rabbit -6.056850e-06 -1.727882e-03
row4col3Stream 9.610697e-05 2.819636e-03
row4col3Trout -1.935461e-03 2.270058e-03
row4col3Wolf 2.736143e-03 2.164997e-03
row4col4Bear 1.667520e-03 -6.277657e-03
row4col4Bee -1.295554e-03 -4.534049e-02
row4col4Deer 9.883757e-05 -1.041393e-02
row4col4Dragonfly -1.969235e-03 3.747668e-02
row4col4Eagle -4.116733e-04 1.855025e-02
row4col4Fox 8.728438e-04 -2.910751e-03
row4col4Meadow -4.633006e-04 -1.279565e-01
row4col4Rabbit 3.175449e-05 -4.498959e-03
row4col4Stream -7.954553e-05 1.200424e-01
row4col4Trout -1.591449e-03 3.109297e-02
row4col4Wolf 3.139802e-03 -9.763951e-03
row4col5Bear 2.554251e-03 -1.217946e-02
row4col5Bee -1.244892e-03 -2.987918e-02
row4col5Deer -8.923392e-04 -9.173750e-03
row4col5Dragonfly -2.406837e-03 4.944505e-02
row4col5Eagle -4.276275e-05 1.466232e-02
row4col5Fox 7.774360e-04 1.135927e-02
row4col5Meadow -1.579138e-03 -1.188984e-01
row4col5Rabbit -5.172192e-04 8.419076e-03
row4col5Stream 2.506626e-04 8.309288e-02
row4col5Trout -6.438973e-04 1.526172e-02
row4col5Wolf 3.744735e-03 -1.210952e-02
score -9.997436e-01 2.358008e-06
# No lowest
grids <- database_new %>%
filter(score > 61) %>%
select(-c(ID, pool))
dmy <- dummyVars(" ~ .", data = grids)
grids <- data.frame(predict(dmy, newdata = grids))grids_noscore <- grids %>%
select(-c(score))
noscore_matrix <- as.matrix(grids_noscore)# write.csv(grids_noscore, here::here("grids_noscore.csv"), row.names = FALSE)set.seed(4)
km_spec2 <- k_means(num_clusters = 3)
grids_recipe <- recipe(~., data = noscore_matrix)
km_wflow2 <- workflow() |>
add_recipe(grids_recipe) |>
add_model(km_spec2)
km_fitted2 <- km_wflow2 |> fit(noscore_matrix)
km_fitted2 |> extract_centroids()# A tibble: 3 × 221
.cluster row1col1Bear row1col1Bee row1col1Deer row1col1Dragonfly row1col1Eagle
<fct> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Cluster… 0.0693 0.0704 0.127 0.0144 0.0188
2 Cluster… 0.0530 0.00263 0.0878 0.142 0.0606
3 Cluster… 0.0612 0.0178 0.183 0.0536 0.0371
# ℹ 215 more variables: row1col1Fox <dbl>, row1col1Meadow <dbl>,
# row1col1Rabbit <dbl>, row1col1Stream <dbl>, row1col1Trout <dbl>,
# row1col1Wolf <dbl>, row1col2Bear <dbl>, row1col2Bee <dbl>,
# row1col2Deer <dbl>, row1col2Dragonfly <dbl>, row1col2Eagle <dbl>,
# row1col2Fox <dbl>, row1col2Meadow <dbl>, row1col2Rabbit <dbl>,
# row1col2Stream <dbl>, row1col2Trout <dbl>, row1col2Wolf <dbl>,
# row1col3Bear <dbl>, row1col3Bee <dbl>, row1col3Deer <dbl>, …
grids_km2 <- kmeans(noscore_matrix, centers = 3)
grids_km2$totss[1] 634714.3
grids_km2$withinss[1] 168666.4 262535.6 166322.3
grids_km2$betweenss[1] 37189.92
set.seed(4)
km_spec2 <- k_means(num_clusters = 4)
grids_recipe <- recipe(~., data = noscore_matrix)
km_wflow2 <- workflow() |>
add_recipe(grids_recipe) |>
add_model(km_spec2)
km_fitted2 <- km_wflow2 |> fit(noscore_matrix)
km_fitted2 |> extract_centroids()# A tibble: 4 × 221
.cluster row1col1Bear row1col1Bee row1col1Deer row1col1Dragonfly row1col1Eagle
<fct> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Cluster… 0.0637 0.114 0.0669 0.00231 0.00760
2 Cluster… 0.0742 0.0192 0.189 0.0397 0.0342
3 Cluster… 0.0533 0.00310 0.0816 0.143 0.0616
4 Cluster… 0.0585 0.00865 0.190 0.0613 0.0398
# ℹ 215 more variables: row1col1Fox <dbl>, row1col1Meadow <dbl>,
# row1col1Rabbit <dbl>, row1col1Stream <dbl>, row1col1Trout <dbl>,
# row1col1Wolf <dbl>, row1col2Bear <dbl>, row1col2Bee <dbl>,
# row1col2Deer <dbl>, row1col2Dragonfly <dbl>, row1col2Eagle <dbl>,
# row1col2Fox <dbl>, row1col2Meadow <dbl>, row1col2Rabbit <dbl>,
# row1col2Stream <dbl>, row1col2Trout <dbl>, row1col2Wolf <dbl>,
# row1col3Bear <dbl>, row1col3Bee <dbl>, row1col3Deer <dbl>, …
grids_km2 <- kmeans(noscore_matrix, centers = 4)
grids_km2$totss[1] 634714.3
grids_km2$withinss[1] 147510.1 146907.5 147161.9 145439.2
grids_km2$betweenss[1] 47695.6
set.seed(4)
km_spec2 <- k_means(num_clusters = 5)
grids_recipe <- recipe(~., data = noscore_matrix)
km_wflow2 <- workflow() |>
add_recipe(grids_recipe) |>
add_model(km_spec2)
km_fitted2 <- km_wflow2 |> fit(noscore_matrix)
km_fitted2 |> extract_centroids()# A tibble: 5 × 221
.cluster row1col1Bear row1col1Bee row1col1Deer row1col1Dragonfly row1col1Eagle
<fct> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Cluster… 0.0624 0.116 0.0658 0.00180 0.00732
2 Cluster… 0.0749 0.0193 0.190 0.0386 0.0335
3 Cluster… 0.0551 0.00358 0.0798 0.144 0.0626
4 Cluster… 0.0856 0.00599 0.155 0.0837 0.0255
5 Cluster… 0.0238 0.0116 0.202 0.0538 0.0612
# ℹ 215 more variables: row1col1Fox <dbl>, row1col1Meadow <dbl>,
# row1col1Rabbit <dbl>, row1col1Stream <dbl>, row1col1Trout <dbl>,
# row1col1Wolf <dbl>, row1col2Bear <dbl>, row1col2Bee <dbl>,
# row1col2Deer <dbl>, row1col2Dragonfly <dbl>, row1col2Eagle <dbl>,
# row1col2Fox <dbl>, row1col2Meadow <dbl>, row1col2Rabbit <dbl>,
# row1col2Stream <dbl>, row1col2Trout <dbl>, row1col2Wolf <dbl>,
# row1col3Bear <dbl>, row1col3Bee <dbl>, row1col3Deer <dbl>, …
grids_km2 <- kmeans(noscore_matrix, centers = 5)
grids_km2$totss[1] 634714.3
grids_km2$withinss[1] 76104.60 96203.87 143008.40 124196.79 141839.81
grids_km2$betweenss[1] 53360.8
set.seed(4)
km_spec2 <- k_means(num_clusters = 6)
grids_recipe <- recipe(~., data = noscore_matrix)
km_wflow2 <- workflow() |>
add_recipe(grids_recipe) |>
add_model(km_spec2)
km_fitted2 <- km_wflow2 |> fit(noscore_matrix)
km_fitted2 |> extract_centroids()# A tibble: 6 × 221
.cluster row1col1Bear row1col1Bee row1col1Deer row1col1Dragonfly row1col1Eagle
<fct> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Cluster… 0.0606 0.118 0.0626 0.00174 0.00741
2 Cluster… 0.0772 0.0195 0.192 0.0391 0.0325
3 Cluster… 0.0649 0.00154 0.0890 0.158 0.0254
4 Cluster… 0.0906 0.00749 0.167 0.0705 0.0294
5 Cluster… 0.0375 0.00441 0.0827 0.123 0.0915
6 Cluster… 0.0263 0.0137 0.210 0.0493 0.0476
# ℹ 215 more variables: row1col1Fox <dbl>, row1col1Meadow <dbl>,
# row1col1Rabbit <dbl>, row1col1Stream <dbl>, row1col1Trout <dbl>,
# row1col1Wolf <dbl>, row1col2Bear <dbl>, row1col2Bee <dbl>,
# row1col2Deer <dbl>, row1col2Dragonfly <dbl>, row1col2Eagle <dbl>,
# row1col2Fox <dbl>, row1col2Meadow <dbl>, row1col2Rabbit <dbl>,
# row1col2Stream <dbl>, row1col2Trout <dbl>, row1col2Wolf <dbl>,
# row1col3Bear <dbl>, row1col3Bee <dbl>, row1col3Deer <dbl>, …
grids_km2 <- kmeans(noscore_matrix, centers = 6)
grids_km2$totss[1] 634714.3
grids_km2$withinss[1] 140257.59 85086.88 138441.52 88224.11 63939.32 60330.98
grids_km2$betweenss[1] 58433.85
set.seed(4)
km_spec2 <- k_means(num_clusters = 7)
grids_recipe <- recipe(~., data = noscore_matrix)
km_wflow2 <- workflow() |>
add_recipe(grids_recipe) |>
add_model(km_spec2)
km_fitted2 <- km_wflow2 |> fit(noscore_matrix)
km_fitted2 |> extract_centroids()# A tibble: 7 × 221
.cluster row1col1Bear row1col1Bee row1col1Deer row1col1Dragonfly row1col1Eagle
<fct> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Cluster… 0.00924 0.162 0.0645 0.00250 0.00475
2 Cluster… 0.102 0.0107 0.167 0.0437 0.0199
3 Cluster… 0.119 0.0696 0.0698 0.00440 0.00880
4 Cluster… 0.0630 0.00131 0.0884 0.152 0.0256
5 Cluster… 0.0620 0.0100 0.205 0.0532 0.0389
6 Cluster… 0.0390 0.00363 0.0852 0.122 0.0871
7 Cluster… 0.0438 0.0297 0.203 0.0433 0.0457
# ℹ 215 more variables: row1col1Fox <dbl>, row1col1Meadow <dbl>,
# row1col1Rabbit <dbl>, row1col1Stream <dbl>, row1col1Trout <dbl>,
# row1col1Wolf <dbl>, row1col2Bear <dbl>, row1col2Bee <dbl>,
# row1col2Deer <dbl>, row1col2Dragonfly <dbl>, row1col2Eagle <dbl>,
# row1col2Fox <dbl>, row1col2Meadow <dbl>, row1col2Rabbit <dbl>,
# row1col2Stream <dbl>, row1col2Trout <dbl>, row1col2Wolf <dbl>,
# row1col3Bear <dbl>, row1col3Bee <dbl>, row1col3Deer <dbl>, …
grids_km2 <- kmeans(noscore_matrix, centers = 7)
grids_km2$totss[1] 634714.3
grids_km2$withinss[1] 72114.74 102254.12 113256.43 72311.59 61848.85 87841.65 62295.00
grids_km2$betweenss[1] 62791.89
km_fitted2 %>% extract_cluster_assignment()# A tibble: 36,066 × 1
.cluster
<fct>
1 Cluster_1
2 Cluster_2
3 Cluster_3
4 Cluster_2
5 Cluster_4
6 Cluster_2
7 Cluster_2
8 Cluster_5
9 Cluster_6
10 Cluster_2
# ℹ 36,056 more rows
database_lowest_removed <- database_new %>%
filter(score > 61)database1 <- database_lowest_removed %>%
mutate(cluster = extract_cluster_assignment(km_fitted2)$.cluster)database1 %>%
group_by(cluster) %>%
summarise(mean_score=mean(score),
sd_score=sd(score),
count=n(),
.groups = 'drop')# A tibble: 7 × 4
cluster mean_score sd_score count
<fct> <dbl> <dbl> <int>
1 Cluster_1 70.4 5.76 4003
2 Cluster_2 70.4 5.88 3822
3 Cluster_3 69.9 5.58 5227
4 Cluster_4 70.2 5.71 4571
5 Cluster_5 70.2 5.75 7293
6 Cluster_6 70.0 5.70 5788
7 Cluster_7 69.9 5.71 5362
summary(database_new$score) Min. 1st Qu. Median Mean 3rd Qu. Max.
17.00 61.00 67.00 65.79 72.00 116.00
database_new3 <- database_new %>%
filter(score > 61) %>%
select(-c(ID,pool,score))
ID_cols <- database_new %>%
filter(score > 61) %>%
select(ID)# Run once
first_row <- score_grid(matrix(c(t(database_new3[1,])),nrow=4,ncol=5,byrow=T), individual=TRUE)
db_individual <- data.frame(
bear_score = c(first_row[1]),
bee_score = c(first_row[2]),
meadow_score = c(first_row[3]),
trout_score = c(first_row[4]),
eagle_score = c(first_row[5]),
rabbit_score = c(first_row[6]),
dragonfly_score = c(first_row[7]),
fox_score = c(first_row[8]),
deer_score = c(first_row[9]),
stream_score = c(first_row[10]),
wolves_score = c(first_row[11]),
dv_score = c(first_row[12])
)
for(i in 2:nrow(database_new3)){
row <- as.list(score_grid(matrix(c(t(database_new3[i,])),nrow=4,ncol=5,byrow=T), individual=TRUE))
db_individual <- rbind(db_individual, row)
}# db_individual_ID <- db_individual %>%
# mutate(ID = ID_cols$ID) %>%
# select(ID, 1:12)
#
# write.csv(db_individual_ID, here::here("db_individual_lowest_removed.csv"), row.names = FALSE)db_individual_removed <- read.csv(here::here("db_individual_lowest_removed.csv"))
db_individual_removed <- db_individual_removed %>%
select(-ID)
individual_matrix <- as.matrix(db_individual_removed)set.seed(4)
km_spec3 <- k_means(num_clusters = 3)
grids_recipe <- recipe(~., data = individual_matrix)
km_wflow3 <- workflow() |>
add_recipe(grids_recipe) |>
add_model(km_spec3)
km_fitted3 <- km_wflow3 |> fit(individual_matrix)
km_fitted3 |> extract_centroids()# A tibble: 3 × 13
.cluster bear_score bee_score meadow_score trout_score eagle_score
<fct> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Cluster_1 6.81 5.21 5.33 6.21 7.83
2 Cluster_2 4.04 4.24 4.82 6.09 3.25
3 Cluster_3 5.27 13.5 13.1 4.24 3.40
# ℹ 7 more variables: rabbit_score <dbl>, dragonfly_score <dbl>,
# fox_score <dbl>, deer_score <dbl>, stream_score <dbl>, wolves_score <dbl>,
# dv_score <dbl>
grids_km3 <- kmeans(individual_matrix, centers = 3)
grids_km3$totss[1] 9516091
grids_km3$withinss[1] 2798274 1538188 1638007
grids_km3$betweenss[1] 3541622
set.seed(4)
km_spec3 <- k_means(num_clusters = 4)
grids_recipe <- recipe(~., data = individual_matrix)
km_wflow3 <- workflow() |>
add_recipe(grids_recipe) |>
add_model(km_spec3)
km_fitted3 <- km_wflow3 |> fit(individual_matrix)
km_fitted3 |> extract_centroids()# A tibble: 4 × 13
.cluster bear_score bee_score meadow_score trout_score eagle_score
<fct> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Cluster_1 5.78 13.2 12.7 4.22 3.11
2 Cluster_2 5.57 5.25 5.48 6.06 3.44
3 Cluster_3 6.85 5.11 5.46 6.41 11.8
4 Cluster_4 3.39 3.75 4.29 6.05 2.98
# ℹ 7 more variables: rabbit_score <dbl>, dragonfly_score <dbl>,
# fox_score <dbl>, deer_score <dbl>, stream_score <dbl>, wolves_score <dbl>,
# dv_score <dbl>
grids_km3 <- kmeans(individual_matrix, centers = 4)
grids_km3$totss[1] 9516091
grids_km3$withinss[1] 1434699 1606572 1554870 761314
grids_km3$betweenss[1] 4158636
set.seed(4)
km_spec3 <- k_means(num_clusters = 5)
grids_recipe <- recipe(~., data = individual_matrix)
km_wflow3 <- workflow() |>
add_recipe(grids_recipe) |>
add_model(km_spec3)
km_fitted3 <- km_wflow3 |> fit(individual_matrix)
km_fitted3 |> extract_centroids()# A tibble: 5 × 13
.cluster bear_score bee_score meadow_score trout_score eagle_score
<fct> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Cluster_1 9.50 7.46 5.14 5.61 3.90
2 Cluster_2 4.58 4.97 5.86 6.17 3.71
3 Cluster_3 4.75 14.0 14.1 4.11 3.37
4 Cluster_4 5.28 4.37 6.04 6.51 14.3
5 Cluster_5 3.40 3.74 4.28 6.04 2.98
# ℹ 7 more variables: rabbit_score <dbl>, dragonfly_score <dbl>,
# fox_score <dbl>, deer_score <dbl>, stream_score <dbl>, wolves_score <dbl>,
# dv_score <dbl>
grids_km3 <- kmeans(individual_matrix, centers = 5)
grids_km3$totss[1] 9516091
grids_km3$withinss[1] 1136207.9 1126869.3 758003.9 1006652.1 927066.5
grids_km3$betweenss[1] 4561291
set.seed(4)
km_spec3 <- k_means(num_clusters = 6)
grids_recipe <- recipe(~., data = individual_matrix)
km_wflow3 <- workflow() |>
add_recipe(grids_recipe) |>
add_model(km_spec3)
km_fitted3 <- km_wflow3 |> fit(individual_matrix)
km_fitted3 |> extract_centroids()# A tibble: 6 × 13
.cluster bear_score bee_score meadow_score trout_score eagle_score
<fct> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Cluster_1 11.6 8.54 4.67 5.89 4.35
2 Cluster_2 4.51 4.90 5.29 6.24 3.70
3 Cluster_3 4.61 14.1 14.1 4.13 3.39
4 Cluster_4 5.31 4.59 5.61 6.50 15.2
5 Cluster_5 4.35 4.61 6.91 5.63 3.96
6 Cluster_6 3.03 3.25 3.81 5.99 2.61
# ℹ 7 more variables: rabbit_score <dbl>, dragonfly_score <dbl>,
# fox_score <dbl>, deer_score <dbl>, stream_score <dbl>, wolves_score <dbl>,
# dv_score <dbl>
grids_km3 <- kmeans(individual_matrix, centers = 6)
grids_km3$totss[1] 9516091
grids_km3$withinss[1] 759440.3 409600.7 747652.9 905431.2 778196.0 1082726.5
grids_km3$betweenss[1] 4833044
set.seed(4)
km_spec3 <- k_means(num_clusters = 7)
grids_recipe <- recipe(~., data = individual_matrix)
km_wflow3 <- workflow() |>
add_recipe(grids_recipe) |>
add_model(km_spec3)
km_fitted3 <- km_wflow3 |> fit(individual_matrix)
km_fitted3 |> extract_centroids()# A tibble: 7 × 13
.cluster bear_score bee_score meadow_score trout_score eagle_score
<fct> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Cluster_1 12.4 7.96 5.03 5.96 4.35
2 Cluster_2 4.54 4.79 5.34 6.24 3.69
3 Cluster_3 4.95 18.9 12.9 3.75 3.14
4 Cluster_4 4.40 8.61 13.9 4.75 4.01
5 Cluster_5 5.36 4.58 5.36 6.51 15.4
6 Cluster_6 4.68 5.33 4.63 5.78 4.02
7 Cluster_7 3.03 3.24 3.80 6.00 2.61
# ℹ 7 more variables: rabbit_score <dbl>, dragonfly_score <dbl>,
# fox_score <dbl>, deer_score <dbl>, stream_score <dbl>, wolves_score <dbl>,
# dv_score <dbl>
grids_km3 <- kmeans(individual_matrix, centers = 7)
grids_km3$totss[1] 9516091
grids_km3$withinss[1] 409399.1 641115.4 720996.2 587674.1 739842.3 869105.4 518338.6
grids_km3$betweenss[1] 5029620
centroids <- km_fitted3 |> extract_centroids()
centroids# A tibble: 7 × 13
.cluster bear_score bee_score meadow_score trout_score eagle_score
<fct> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Cluster_1 12.4 7.96 5.03 5.96 4.35
2 Cluster_2 4.54 4.79 5.34 6.24 3.69
3 Cluster_3 4.95 18.9 12.9 3.75 3.14
4 Cluster_4 4.40 8.61 13.9 4.75 4.01
5 Cluster_5 5.36 4.58 5.36 6.51 15.4
6 Cluster_6 4.68 5.33 4.63 5.78 4.02
7 Cluster_7 3.03 3.24 3.80 6.00 2.61
# ℹ 7 more variables: rabbit_score <dbl>, dragonfly_score <dbl>,
# fox_score <dbl>, deer_score <dbl>, stream_score <dbl>, wolves_score <dbl>,
# dv_score <dbl>
database_lowest_removed <- database_new %>%
filter(score > 61)database3 <- database_lowest_removed %>%
mutate(cluster = extract_cluster_assignment(km_fitted3)$.cluster)database3 %>%
group_by(cluster) %>%
summarize(mean_score=mean(score),
sd_score=sd(score),
p25th=quantile(score,probs=0.25),
median=quantile(score,probs=0.5),
p75th=quantile(score,probs=0.75),
count=n(),
.groups = 'drop')# A tibble: 7 × 7
cluster mean_score sd_score p25th median p75th count
<fct> <dbl> <dbl> <dbl> <dbl> <dbl> <int>
1 Cluster_1 67.9 4.07 65 68 71 5086
2 Cluster_2 71.6 5.49 67 71 76 6745
3 Cluster_3 71.8 5.05 68 72 75 3806
4 Cluster_4 69.2 4.65 65 69 73 6752
5 Cluster_5 69.7 4.66 66 69 73 5252
6 Cluster_6 67.1 3.64 64 67 70 6127
7 Cluster_7 79.4 8.09 74 79 85 2298
db_individual_removed <- read.csv(here::here("db_individual_lowest_removed.csv"))
db_individual_removed <- db_individual_removed %>%
select(-ID)db_scaled <- data.frame(lapply(db_individual_removed,scale))individual_matrix <- as.matrix(db_scaled)set.seed(4)
km_spec3 <- k_means(num_clusters = 3)
grids_recipe <- recipe(~., data = individual_matrix)
km_wflow3 <- workflow() |>
add_recipe(grids_recipe) |>
add_model(km_spec3)
km_fitted3 <- km_wflow3 |> fit(individual_matrix)
km_fitted3 |> extract_centroids()# A tibble: 3 × 13
.cluster bear_score bee_score meadow_score trout_score eagle_score
<fct> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Cluster_1 0.0892 0.747 0.734 -0.351 -0.476
2 Cluster_2 -0.317 -0.546 -0.474 0.288 -0.450
3 Cluster_3 0.156 -0.382 -0.425 0.155 0.880
# ℹ 7 more variables: rabbit_score <dbl>, dragonfly_score <dbl>,
# fox_score <dbl>, deer_score <dbl>, stream_score <dbl>, wolves_score <dbl>,
# dv_score <dbl>
grids_km3 <- kmeans(individual_matrix, centers = 3)
grids_km3$totss[1] 432780
grids_km3$withinss[1] 115486.2 125410.6 101487.0
grids_km3$betweenss[1] 90396.15
set.seed(4)
km_spec3 <- k_means(num_clusters = 4)
grids_recipe <- recipe(~., data = individual_matrix)
km_wflow3 <- workflow() |>
add_recipe(grids_recipe) |>
add_model(km_spec3)
km_fitted3 <- km_wflow3 |> fit(individual_matrix)
km_fitted3 |> extract_centroids()# A tibble: 4 × 13
.cluster bear_score bee_score meadow_score trout_score eagle_score
<fct> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Cluster_1 0.565 -0.186 -0.412 0.0645 -0.297
2 Cluster_2 -0.196 1.08 1.17 -0.422 -0.418
3 Cluster_3 -0.158 -0.378 -0.286 0.0755 1.29
4 Cluster_4 -0.464 -0.572 -0.433 0.312 -0.447
# ℹ 7 more variables: rabbit_score <dbl>, dragonfly_score <dbl>,
# fox_score <dbl>, deer_score <dbl>, stream_score <dbl>, wolves_score <dbl>,
# dv_score <dbl>
grids_km3 <- kmeans(individual_matrix, centers = 4)
grids_km3$totss[1] 432780
grids_km3$withinss[1] 72386.03 92085.26 76858.79 77842.34
grids_km3$betweenss[1] 113607.6
set.seed(4)
km_spec3 <- k_means(num_clusters = 5)
grids_recipe <- recipe(~., data = individual_matrix)
km_wflow3 <- workflow() |>
add_recipe(grids_recipe) |>
add_model(km_spec3)
km_fitted3 <- km_wflow3 |> fit(individual_matrix)
km_fitted3 |> extract_centroids()# A tibble: 5 × 13
.cluster bear_score bee_score meadow_score trout_score eagle_score
<fct> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Cluster_1 1.48 0.00374 -0.374 0.197 -0.124
2 Cluster_2 -0.312 -0.322 -0.306 -0.0824 -0.326
3 Cluster_3 -0.277 1.14 1.22 -0.431 -0.415
4 Cluster_4 -0.204 -0.390 -0.276 0.105 1.44
5 Cluster_5 -0.451 -0.582 -0.459 0.353 -0.442
# ℹ 7 more variables: rabbit_score <dbl>, dragonfly_score <dbl>,
# fox_score <dbl>, deer_score <dbl>, stream_score <dbl>, wolves_score <dbl>,
# dv_score <dbl>
grids_km3 <- kmeans(individual_matrix, centers = 5)
grids_km3$totss[1] 432780
grids_km3$withinss[1] 56580.06 70998.24 43048.48 72480.97 60870.76
grids_km3$betweenss[1] 128801.5
set.seed(4)
km_spec3 <- k_means(num_clusters = 6)
grids_recipe <- recipe(~., data = individual_matrix)
km_wflow3 <- workflow() |>
add_recipe(grids_recipe) |>
add_model(km_spec3)
km_fitted3 <- km_wflow3 |> fit(individual_matrix)
km_fitted3 |> extract_centroids()# A tibble: 6 × 13
.cluster bear_score bee_score meadow_score trout_score eagle_score
<fct> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Cluster_1 1.55 0.0217 -0.355 0.187 -0.127
2 Cluster_2 -0.336 -0.410 -0.434 0.235 -0.248
3 Cluster_3 -0.271 1.17 1.24 -0.434 -0.401
4 Cluster_4 -0.174 -0.385 -0.283 0.0957 1.56
5 Cluster_5 -0.250 -0.250 -0.217 -0.156 -0.275
6 Cluster_6 -0.479 -0.645 -0.361 0.335 -0.590
# ℹ 7 more variables: rabbit_score <dbl>, dragonfly_score <dbl>,
# fox_score <dbl>, deer_score <dbl>, stream_score <dbl>, wolves_score <dbl>,
# dv_score <dbl>
grids_km3 <- kmeans(individual_matrix, centers = 6)
grids_km3$totss[1] 432780
grids_km3$withinss[1] 45579.53 38851.64 45487.45 52669.82 45162.10 63515.07
grids_km3$betweenss[1] 141514.4
set.seed(4)
km_spec3 <- k_means(num_clusters = 7)
grids_recipe <- recipe(~., data = individual_matrix)
km_wflow3 <- workflow() |>
add_recipe(grids_recipe) |>
add_model(km_spec3)
km_fitted3 <- km_wflow3 |> fit(individual_matrix)
km_fitted3 |> extract_centroids()# A tibble: 7 × 13
.cluster bear_score bee_score meadow_score trout_score eagle_score
<fct> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Cluster_1 1.66 0.0356 -0.343 0.167 -0.109
2 Cluster_2 -0.152 -0.160 -0.113 -0.0776 -0.204
3 Cluster_3 -0.260 1.26 1.27 -0.453 -0.405
4 Cluster_4 -0.173 -0.382 -0.281 0.0888 1.63
5 Cluster_5 -0.224 -0.219 -0.162 -0.109 -0.240
6 Cluster_6 -0.333 -0.417 -0.451 0.275 -0.249
7 Cluster_7 -0.487 -0.663 -0.389 0.343 -0.593
# ℹ 7 more variables: rabbit_score <dbl>, dragonfly_score <dbl>,
# fox_score <dbl>, deer_score <dbl>, stream_score <dbl>, wolves_score <dbl>,
# dv_score <dbl>
grids_km3 <- kmeans(individual_matrix, centers = 7)
grids_km3$totss[1] 432780
grids_km3$withinss[1] 34441.26 44327.60 48032.11 39527.74 28056.57 43783.01 42281.56
grids_km3$betweenss[1] 152330.2
centroids <- km_fitted3 |> extract_centroids()
centroids# A tibble: 7 × 13
.cluster bear_score bee_score meadow_score trout_score eagle_score
<fct> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Cluster_1 1.66 0.0356 -0.343 0.167 -0.109
2 Cluster_2 -0.152 -0.160 -0.113 -0.0776 -0.204
3 Cluster_3 -0.260 1.26 1.27 -0.453 -0.405
4 Cluster_4 -0.173 -0.382 -0.281 0.0888 1.63
5 Cluster_5 -0.224 -0.219 -0.162 -0.109 -0.240
6 Cluster_6 -0.333 -0.417 -0.451 0.275 -0.249
7 Cluster_7 -0.487 -0.663 -0.389 0.343 -0.593
# ℹ 7 more variables: rabbit_score <dbl>, dragonfly_score <dbl>,
# fox_score <dbl>, deer_score <dbl>, stream_score <dbl>, wolves_score <dbl>,
# dv_score <dbl>
database_new %>% summarize(mean_score=mean(score),
sd_score=sd(score),
min=quantile(score, probs=0),
p25th=quantile(score,probs=0.25),
median=quantile(score,probs=0.5),
p75th=quantile(score,probs=0.75),
max=quantile(score, probs=1)
) mean_score sd_score min p25th median p75th max
1 65.79404 9.135006 17 61 67 72 116
database_lowest_removed <- database_new %>%
filter(score > 61)database3 <- database_lowest_removed %>%
mutate(`7cluster` = extract_cluster_assignment(km_fitted3)$.cluster)database3 <- database3 %>%
mutate(`3cluster` = extract_cluster_assignment(km_fitted3)$.cluster)# write.csv(database3, here::here("normalized_clusters.csv"), row.names = FALSE)cluster_data <- read.csv(here::here("normalized_clusters.csv"))cluster_data %>%
group_by(X7cluster) %>%
summarize(mean_score=mean(score),
sd_score=sd(score),
p25th=quantile(score,probs=0.25),
median=quantile(score,probs=0.5),
p75th=quantile(score,probs=0.75),
count=n(),
.groups = 'drop')# A tibble: 7 × 7
X7cluster mean_score sd_score p25th median p75th count
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <int>
1 Cluster_1 68.8 4.44 65 68 72 4887
2 Cluster_2 67.9 4.10 65 67 71 4291
3 Cluster_3 70.9 5.06 67 71 75 6671
4 Cluster_4 70.0 4.80 66 70 73 5405
5 Cluster_5 67.8 4.12 64 67 71 5800
6 Cluster_6 73.2 6.73 68 73 78 5870
7 Cluster_7 72.4 8.29 66 71 77 3142
count_prop <- function(card_name, database){
df_pos <- data.frame(
row1col1 = as.numeric(nrow(database %>% filter(row1col1 == card_name))),
row1col2 = as.numeric(nrow(database %>% filter(row1col2 == card_name))),
row1col3 = as.numeric(nrow(database %>% filter(row1col3 == card_name))),
row1col4 = as.numeric(nrow(database %>% filter(row1col4 == card_name))),
row1col5 = as.numeric(nrow(database %>% filter(row1col5 == card_name))),
row2col1 = as.numeric(nrow(database %>% filter(row2col1 == card_name))),
row2col2 = as.numeric(nrow(database %>% filter(row2col2 == card_name))),
row2col3 = as.numeric(nrow(database %>% filter(row2col3 == card_name))),
row2col4 = as.numeric(nrow(database %>% filter(row2col4 == card_name))),
row2col5 = as.numeric(nrow(database %>% filter(row2col5 == card_name))),
row3col1 = as.numeric(nrow(database %>% filter(row3col1 == card_name))),
row3col2 = as.numeric(nrow(database %>% filter(row3col2 == card_name))),
row3col3 = as.numeric(nrow(database %>% filter(row3col3 == card_name))),
row3col4 = as.numeric(nrow(database %>% filter(row3col4 == card_name))),
row3col5 = as.numeric(nrow(database %>% filter(row3col5 == card_name))),
row4col1 = as.numeric(nrow(database %>% filter(row4col1 == card_name))),
row4col2 = as.numeric(nrow(database %>% filter(row4col2 == card_name))),
row4col3 = as.numeric(nrow(database %>% filter(row4col3 == card_name))),
row4col4 = as.numeric(nrow(database %>% filter(row4col4 == card_name))),
row4col5 = as.numeric(nrow(database %>% filter(row4col5 == card_name)))
)
df_pos_per <- apply(df_pos, 1, function(x) x/sum(x))
row <- c("row1", "row2", "row3", "row4")
col <- c("col1", "col2", "col3", "col4", "col5")
df_hm <- expand.grid(col = col, row = row)
df_hm <- df_hm %>%
mutate(proportion = df_pos_per[,1])
return(df_hm)
}# bear_pos <- data.frame(
# row1col1 = as.numeric(nrow(database_new %>% filter(row1col1 == "Bear"))),
# row1col2 = as.numeric(nrow(database_new %>% filter(row1col2 == "Bear"))),
# row1col3 = as.numeric(nrow(database_new %>% filter(row1col3 == "Bear"))),
# row1col4 = as.numeric(nrow(database_new %>% filter(row1col4 == "Bear"))),
# row1col5 = as.numeric(nrow(database_new %>% filter(row1col5 == "Bear"))),
# row2col1 = as.numeric(nrow(database_new %>% filter(row2col1 == "Bear"))),
# row2col2 = as.numeric(nrow(database_new %>% filter(row2col2 == "Bear"))),
# row2col3 = as.numeric(nrow(database_new %>% filter(row2col3 == "Bear"))),
# row2col4 = as.numeric(nrow(database_new %>% filter(row2col4 == "Bear"))),
# row2col5 = as.numeric(nrow(database_new %>% filter(row2col5 == "Bear"))),
# row3col1 = as.numeric(nrow(database_new %>% filter(row3col1 == "Bear"))),
# row3col2 = as.numeric(nrow(database_new %>% filter(row3col2 == "Bear"))),
# row3col3 = as.numeric(nrow(database_new %>% filter(row3col3 == "Bear"))),
# row3col4 = as.numeric(nrow(database_new %>% filter(row3col4 == "Bear"))),
# row3col5 = as.numeric(nrow(database_new %>% filter(row3col5 == "Bear"))),
# row4col1 = as.numeric(nrow(database_new %>% filter(row4col1 == "Bear"))),
# row4col2 = as.numeric(nrow(database_new %>% filter(row4col2 == "Bear"))),
# row4col3 = as.numeric(nrow(database_new %>% filter(row4col3 == "Bear"))),
# row4col4 = as.numeric(nrow(database_new %>% filter(row4col4 == "Bear"))),
# row4col5 = as.numeric(nrow(database_new %>% filter(row4col5 == "Bear")))
# )
#
# bear_pos_per <- apply(bear_pos, 1, function(x) x/sum(x))
#
# row <- c("row1", "row2", "row3", "row4")
# col <- c("col1", "col2", "col3", "col4", "col5")
# bear_hm <- expand.grid(col = col, row = row)
# bear_hm <- bear_hm %>%
# mutate(proportion = bear_pos_per[,1])make_heatmap <- function(df, title){
ggplot(aes(x=col, y=row, fill=proportion), data=df) +
geom_tile() +
theme_minimal() +
scale_fill_gradient(low="#F0F0F0", high="#006837") +
labs(title=title)
}make_heatmap(count_prop("Bear", database_new), "Bear")make_heatmap(count_prop("Bee", database_new), "Bee")make_heatmap(count_prop("Meadow", database_new), "Meadow")make_heatmap(count_prop("Trout", database_new), "Trout")make_heatmap(count_prop("Eagle", database_new), "Eagle")make_heatmap(count_prop("Rabbit", database_new), "Rabbit")make_heatmap(count_prop("Dragonfly", database_new), "Dragonfly")make_heatmap(count_prop("Fox", database_new), "Fox")make_heatmap(count_prop("Deer", database_new), "Deer")make_heatmap(count_prop("Stream", database_new), "Stream")make_heatmap(count_prop("Wolf", database_new), "Wolf")bind_7clusters <- function(cluster_num){
rbind(count_prop("Bear",
df <- cluster_data %>%
filter(X7cluster == cluster_num)) %>% mutate(cluster = cluster_num, card = "Bear"),
count_prop("Bee",
df <- cluster_data %>%
filter(X7cluster == cluster_num)) %>% mutate(cluster = cluster_num, card = "Bee"),
count_prop("Meadow",
df <- cluster_data %>%
filter(X7cluster == cluster_num)) %>% mutate(cluster = cluster_num, card = "Meadow"),
count_prop("Trout",
df <- cluster_data %>%
filter(X7cluster == cluster_num)) %>% mutate(cluster = cluster_num, card = "Trout"),
count_prop("Eagle",
df <- cluster_data %>%
filter(X7cluster == cluster_num)) %>% mutate(cluster = cluster_num, card = "Eagle"),
count_prop("Rabbit",
df <- cluster_data %>%
filter(X7cluster == cluster_num)) %>% mutate(cluster = cluster_num, card = "Rabbit"),
count_prop("Dragonfly",
df <- cluster_data %>%
filter(X7cluster == cluster_num)) %>% mutate(cluster = cluster_num, card = "Dragonfly"),
count_prop("Fox",
df <- cluster_data %>%
filter(X7cluster == cluster_num)) %>% mutate(cluster = cluster_num, card = "Fox"),
count_prop("Deer",
df <- cluster_data %>%
filter(X7cluster == cluster_num)) %>% mutate(cluster = cluster_num, card = "Deer"),
count_prop("Stream",
df <- cluster_data %>%
filter(X7cluster == cluster_num)) %>% mutate(cluster = cluster_num, card = "Stream"),
count_prop("Wolf",
df <- cluster_data %>%
filter(X7cluster == cluster_num)) %>% mutate(cluster = cluster_num, card = "Wolf")
)
}cluster_position <- rbind(bind_7clusters("Cluster_1"),
bind_7clusters("Cluster_2"),
bind_7clusters("Cluster_3"),
bind_7clusters("Cluster_4"),
bind_7clusters("Cluster_5"),
bind_7clusters("Cluster_6"),
bind_7clusters("Cluster_7")
)ggplot(aes(x=col, y=row, fill=proportion), data=(cluster_position %>% filter(card == "Bear"))) +
geom_tile() +
theme_minimal() +
scale_fill_gradient(low="#F0F0F0", high="#006837") +
facet_wrap(vars(cluster)) +
labs(title="Bear")overall_position <- rbind(count_prop("Bear", database_new) %>% mutate(card = "Bear"),
count_prop("Bee", database_new) %>% mutate(card = "Bee"),
count_prop("Meadow", database_new) %>% mutate(card = "Meadow"),
count_prop("Trout", database_new) %>% mutate(card = "Trout"),
count_prop("Eagle", database_new) %>% mutate(card = "Eagle"),
count_prop("Rabbit", database_new) %>% mutate(card = "Rabbit"),
count_prop("Dragonfly", database_new) %>% mutate(card = "Dragonfly"),
count_prop("Fox", database_new) %>% mutate(card = "Fox"),
count_prop("Deer", database_new) %>% mutate(card = "Deer"),
count_prop("Stream", database_new) %>% mutate(card = "Stream"),
count_prop("Wolf", database_new) %>% mutate(card = "Wolf")
)ggplot(aes(x=col, y=row, fill=proportion), data=(overall_position)) +
geom_tile() +
theme_minimal() +
scale_fill_gradient(low="#F0F0F0", high="#006837") +
facet_wrap(vars(card)) +
labs(title="Bear positions across clusters")cluster_table <- function(Xcluster, cluster_num, cluster_data){
df <- cluster_data %>%
filter(
case_when(
Xcluster == 7 ~ X7cluster == cluster_num,
Xcluster == 6 ~ X6cluster == cluster_num,
Xcluster == 5 ~ X5cluster == cluster_num,
Xcluster == 4 ~ X4cluster == cluster_num,
Xcluster == 3 ~ X3cluster == cluster_num
)
) %>%
summarize(mean_score=mean(score),
sd_score=sd(score),
p25th=quantile(score,probs=0.25),
median=quantile(score,probs=0.5),
p75th=quantile(score,probs=0.75),
count=n(),
.groups = 'drop')
return(df)
}cluster_prop <- function(Xcluster, cluster_num, database){
df <- database %>%
filter(
case_when(
Xcluster == 7 ~ X7cluster == cluster_num,
Xcluster == 6 ~ X6cluster == cluster_num,
Xcluster == 5 ~ X5cluster == cluster_num,
Xcluster == 4 ~ X4cluster == cluster_num,
Xcluster == 3 ~ X3cluster == cluster_num
)
)
bear_count = 0
bee_count = 0
meadow_count = 0
trout_count = 0
eagle_count = 0
rabbit_count = 0
dragonfly_count = 0
fox_count = 0
deer_count = 0
stream_count = 0
wolf_count = 0
for (j in 2:21){
for(i in 1:nrow(df)){
if(df[i,j] == "Bear"){
bear_count = bear_count + 1
}else if(df[i,j] == "Bee"){
bee_count = bee_count + 1
}else if(df[i,j] == "Meadow"){
meadow_count = meadow_count + 1
}else if(df[i,j] == "Trout"){
trout_count = trout_count + 1
}else if(df[i,j] == "Eagle"){
eagle_count = eagle_count + 1
}else if(df[i,j] == "Rabbit"){
rabbit_count = rabbit_count + 1
}else if(df[i,j] == "Dragonfly"){
dragonfly_count = dragonfly_count + 1
}else if(df[i,j] == "Fox"){
fox_count = fox_count + 1
}else if(df[i,j] == "Deer"){
deer_count = deer_count + 1
}else if(df[i,j] == "Stream"){
stream_count = stream_count + 1
}else if(df[i,j] == "Wolf"){
wolf_count = wolf_count + 1
}else{
print("bugged")
}
}
}
df_prop <- data.frame(
name = c("bear", "bee", "meadow", "trout", "eagle", "rabbit",
"dragonfly", "fox", "deer", "stream", "wolf"),
proportion = c(bear_count/(nrow(df)*20),
bee_count/(nrow(df)*20),
meadow_count/(nrow(df)*20),
trout_count/(nrow(df)*20),
eagle_count/(nrow(df)*20),
rabbit_count/(nrow(df)*20),
dragonfly_count/(nrow(df)*20),
fox_count/(nrow(df)*20),
deer_count/(nrow(df)*20),
stream_count/(nrow(df)*20),
wolf_count/(nrow(df)*20)),
true_prop = c(12/130,
8/130,
20/130,
10/130,
8/130,
8/130,
8/130,
12/130,
12/130,
20/130,
12/130)
)
return(df_prop)
}make_bars <- function(df, title){
ggplot(aes(x = reorder(name, -proportion), y = proportion, fill = reorder(name, -proportion)), data=df) +
geom_bar(stat = "identity") +
scale_fill_brewer(palette="PRGn", direction = -1) +
labs(x = "card", title=title) +
theme(legend.position = "none")
}exact_card_count <- function(Xcluster=NULL, cluster_num=NULL, database){
bear_exact <- rep(0, 13)
bee_exact <- rep(0, 9)
meadow_exact <- rep(0, 21)
trout_exact <- rep(0, 11)
eagle_exact <- rep(0, 9)
rabbit_exact <- rep(0, 9)
dragonfly_exact <- rep(0, 9)
fox_exact <- rep(0, 13)
deer_exact <- rep(0, 13)
stream_exact <- rep(0, 21)
wolf_exact <- rep(0, 13)
if(is.null(Xcluster) && is.null(cluster_num)){
df <- database %>%
filter(pool == "default")
}else{
df <- database %>%
filter(
case_when(
Xcluster == 7 ~ X7cluster == cluster_num,
Xcluster == 6 ~ X6cluster == cluster_num,
Xcluster == 5 ~ X5cluster == cluster_num,
Xcluster == 4 ~ X4cluster == cluster_num,
Xcluster == 3 ~ X3cluster == cluster_num
)
)
}
for(i in 1:nrow(df)){
bear_count <- 0
bee_count <- 0
meadow_count <- 0
trout_count <- 0
eagle_count <- 0
rabbit_count <- 0
dragonfly_count <- 0
fox_count <- 0
deer_count <- 0
stream_count <- 0
wolf_count <- 0
for(j in 2:21){
if(df[i,j] == "Bear"){
bear_count = bear_count + 1
}else if(df[i,j] == "Bee"){
bee_count = bee_count + 1
}else if(df[i,j] == "Meadow"){
meadow_count = meadow_count + 1
}else if(df[i,j] == "Trout"){
trout_count = trout_count + 1
}else if(df[i,j] == "Eagle"){
eagle_count = eagle_count + 1
}else if(df[i,j] == "Rabbit"){
rabbit_count = rabbit_count + 1
}else if(df[i,j] == "Dragonfly"){
dragonfly_count = dragonfly_count + 1
}else if(df[i,j] == "Fox"){
fox_count = fox_count + 1
}else if(df[i,j] == "Deer"){
deer_count = deer_count + 1
}else if(df[i,j] == "Stream"){
stream_count = stream_count + 1
}else if(df[i,j] == "Wolf"){
wolf_count = wolf_count + 1
}else{
print("bugged")
}
}
bear_exact[bear_count+1] = bear_exact[bear_count+1] + 1
bee_exact[bee_count+1] = bee_exact[bee_count+1] + 1
meadow_exact[meadow_count+1] = meadow_exact[meadow_count+1] + 1
trout_exact[trout_count+1] = trout_exact[trout_count+1] + 1
eagle_exact[eagle_count+1] = eagle_exact[eagle_count+1] + 1
rabbit_exact[rabbit_count+1] = rabbit_exact[rabbit_count+1] + 1
dragonfly_exact[dragonfly_count+1] = dragonfly_exact[dragonfly_count+1] + 1
fox_exact[fox_count+1] = fox_exact[fox_count+1] + 1
deer_exact[deer_count+1] = deer_exact[deer_count+1] + 1
stream_exact[stream_count+1] = stream_exact[stream_count+1] + 1
wolf_exact[wolf_count+1] = wolf_exact[wolf_count+1] + 1
}
bear_exact[8] = bear_exact[8]+bear_exact[9]+bear_exact[10]+
bear_exact[11]+bear_exact[12]+bear_exact[13]
bee_exact[8] = bee_exact[8]+bee_exact[9]
meadow_exact[8] = meadow_exact[8]+meadow_exact[9]+meadow_exact[10]+
meadow_exact[11]+meadow_exact[12]+meadow_exact[13]+meadow_exact[14]+
meadow_exact[15]+meadow_exact[16]+meadow_exact[17]+meadow_exact[18]+
meadow_exact[19]+meadow_exact[20]+meadow_exact[21]
trout_exact[8] = trout_exact[8]+trout_exact[9]+trout_exact[10]+
trout_exact[11]
eagle_exact[8] = eagle_exact[8]+eagle_exact[9]
rabbit_exact[8] = rabbit_exact[8]+rabbit_exact[9]
dragonfly_exact[8] = dragonfly_exact[8]+dragonfly_exact[9]
fox_exact[8] = fox_exact[8]+fox_exact[9]+fox_exact[10]+
fox_exact[11]+fox_exact[12]+fox_exact[13]
deer_exact[8] = deer_exact[8]+deer_exact[9]+deer_exact[10]+
deer_exact[11]+deer_exact[12]+deer_exact[13]
stream_exact[8] = stream_exact[8]+stream_exact[9]+stream_exact[10]+
stream_exact[11]+stream_exact[12]+stream_exact[13]+stream_exact[14]+
stream_exact[15]+stream_exact[16]+stream_exact[17]+stream_exact[18]+
stream_exact[19]+stream_exact[20]+stream_exact[21]
wolf_exact[8] = wolf_exact[8]+wolf_exact[9]+wolf_exact[10]+
wolf_exact[11]+wolf_exact[12]+wolf_exact[13]
bear_exact = bear_exact[1:8]
bee_exact = bee_exact[1:8]
meadow_exact = meadow_exact[1:8]
trout_exact = trout_exact[1:8]
eagle_exact = eagle_exact[1:8]
rabbit_exact = rabbit_exact[1:8]
dragonfly_exact = dragonfly_exact[1:8]
fox_exact = fox_exact[1:8]
deer_exact = deer_exact[1:8]
stream_exact = stream_exact[1:8]
wolf_exact = wolf_exact[1:8]
result <- data.frame(
card = c(rep("Bear", 8),
rep("Bee", 8),
rep("Meadow", 8),
rep("Trout", 8),
rep("Eagle", 8),
rep("Rabbit", 8),
rep("Dragonfly", 8),
rep("Fox", 8),
rep("Deer", 8),
rep("Stream", 8),
rep("Wolf", 8)
),
num_exact = c(seq(0,7),
seq(0,7),
seq(0,7),
seq(0,7),
seq(0,7),
seq(0,7),
seq(0,7),
seq(0,7),
seq(0,7),
seq(0,7),
seq(0,7)
),
proportion = c(bear_exact/(nrow(df)),
bee_exact/(nrow(df)),
meadow_exact/(nrow(df)),
trout_exact/(nrow(df)),
eagle_exact/(nrow(df)),
rabbit_exact/(nrow(df)),
dragonfly_exact/(nrow(df)),
fox_exact/(nrow(df)),
deer_exact/(nrow(df)),
stream_exact/(nrow(df)),
wolf_exact/(nrow(df))
)
)
return(result)
}make_exact_bars <- function(df, title){
ggplot(aes(x = factor(num_exact),
y = proportion,
fill = factor(num_exact)),
data =
(df %>%
filter(proportion != 0))
) +
geom_bar(stat = "identity") +
scale_fill_viridis_d() +
facet_wrap(vars(card),scales = "free_x") +
labs(x="Exact number of cards",
fill="Exact number of cards",
title=title)
}nrow(database_new %>% filter(pool == "default"))[1] 30000
default_pool_props <- exact_card_count(database=database_new)ggplot(aes(x = factor(num_exact),
y = proportion,
fill = factor(num_exact)),
data =
(default_pool_props %>%
filter(proportion != 0))
) +
geom_bar(stat = "identity") +
scale_fill_viridis_d() +
facet_wrap(vars(card)) +
labs(x="Exact number of cards",
fill="Exact number of cards",
title="Overall proportions of the default pool")Summary statistics for cluster 7
cluster_table(7, "Cluster_7", cluster_data) mean_score sd_score p25th median p75th count
1 72.38033 8.294532 66 71 77 3142
Proportion of each card type out of all the cards in the cluster
make_bars(cluster_prop(7, "Cluster_7", cluster_data), "7Cluster: Cluster_7")Distributions of the exact number of each card type out of all grids in the cluster
make_exact_bars(exact_card_count(7, "Cluster_7", cluster_data), title="7Cluster: Cluster_7")cluster_table(7, "Cluster_6", cluster_data) mean_score sd_score p25th median p75th count
1 73.23918 6.733394 68 73 78 5870
make_bars(cluster_prop(7, "Cluster_6", cluster_data), "7Cluster: Cluster_6")make_exact_bars(exact_card_count(7, "Cluster_6", cluster_data), title="7Cluster: Cluster_6")cluster_table(7, "Cluster_3", cluster_data) mean_score sd_score p25th median p75th count
1 70.8507 5.061589 67 71 75 6671
make_bars(cluster_prop(7, "Cluster_3", cluster_data), "7Cluster: Cluster_3")make_exact_bars(exact_card_count(7, "Cluster_3", cluster_data), title="7Cluster: Cluster_3")cluster_table(7, "Cluster_4", cluster_data) mean_score sd_score p25th median p75th count
1 69.97354 4.80268 66 70 73 5405
make_bars(cluster_prop(7, "Cluster_4", cluster_data), "7Cluster: Cluster_4")make_exact_bars(exact_card_count(7, "Cluster_4", cluster_data), title="7Cluster: Cluster_4")cluster_table(7, "Cluster_2", cluster_data) mean_score sd_score p25th median p75th count
1 67.85015 4.099993 65 67 71 4291
make_bars(cluster_prop(7, "Cluster_2", cluster_data), "7Cluster: Cluster_2")make_exact_bars(exact_card_count(7, "Cluster_2", cluster_data), title="7Cluster: Cluster_2")cluster_table(7, "Cluster_1", cluster_data) mean_score sd_score p25th median p75th count
1 68.84694 4.440481 65 68 72 4887
make_bars(cluster_prop(7, "Cluster_1", cluster_data), "7Cluster: Cluster_1")make_exact_bars(exact_card_count(7, "Cluster_1", cluster_data), title="7Cluster: Cluster_1")cluster_table(7, "Cluster_5", cluster_data) mean_score sd_score p25th median p75th count
1 67.80948 4.12117 64 67 71 5800
make_bars(cluster_prop(7, "Cluster_5", cluster_data), "7Cluster: Cluster_5")make_exact_bars(exact_card_count(7, "Cluster_5", cluster_data), title="7Cluster: Cluster_5")ggplot(aes(x=col, y=row, fill=proportion), data=(overall_position)) +
geom_tile() +
theme_minimal() +
scale_fill_gradient(low="#F0F0F0", high="#006837") +
facet_wrap(vars(card)) +
labs(title="Positions across all 50,000")# all_cluster_prop <- rbind(cluster_prop(7, "Cluster_1", cluster_data) %>% mutate(cluster = "Cluster_1"),
# cluster_prop(7, "Cluster_2", cluster_data) %>% mutate(cluster = "Cluster_2"),
# cluster_prop(7, "Cluster_3", cluster_data) %>% mutate(cluster = "Cluster_3"),
# cluster_prop(7, "Cluster_4", cluster_data) %>% mutate(cluster = "Cluster_4"),
# cluster_prop(7, "Cluster_5", cluster_data) %>% mutate(cluster = "Cluster_5"),
# cluster_prop(7, "Cluster_6", cluster_data) %>% mutate(cluster = "Cluster_6"),
# cluster_prop(7, "Cluster_7", cluster_data) %>% mutate(cluster = "Cluster_7")
# )all_exact <- rbind(exact_card_count(7, "Cluster_1", cluster_data) %>% mutate(cluster = "Cluster_1"),
exact_card_count(7, "Cluster_2", cluster_data) %>% mutate(cluster = "Cluster_2"),
exact_card_count(7, "Cluster_3", cluster_data) %>% mutate(cluster = "Cluster_3"),
exact_card_count(7, "Cluster_4", cluster_data) %>% mutate(cluster = "Cluster_4"),
exact_card_count(7, "Cluster_5", cluster_data) %>% mutate(cluster = "Cluster_5"),
exact_card_count(7, "Cluster_6", cluster_data) %>% mutate(cluster = "Cluster_6"),
exact_card_count(7, "Cluster_7", cluster_data) %>% mutate(cluster = "Cluster_7")
)# write.csv(all_exact, here::here("all_exact.csv"), row.names = FALSE)# write.csv(all_cluster_prop, here::here("cluster_prop.csv"), row.names = FALSE)all_cluster_prop <- read.csv(here::here("cluster_prop.csv")) %>%
mutate(cluster = factor(cluster, levels = paste0("Cluster_", 1:7)))all_exact <- read.csv(here::here("all_exact.csv")) cluster_data %>%
group_by(X7cluster) %>%
summarize(mean_score=mean(score),
sd_score=sd(score),
p25th=quantile(score,probs=0.25),
median=quantile(score,probs=0.5),
p75th=quantile(score,probs=0.75),
count=n(),
.groups = 'drop') %>%
arrange(desc(mean_score))# A tibble: 7 × 7
X7cluster mean_score sd_score p25th median p75th count
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <int>
1 Cluster_6 73.2 6.73 68 73 78 5870
2 Cluster_7 72.4 8.29 66 71 77 3142
3 Cluster_3 70.9 5.06 67 71 75 6671
4 Cluster_4 70.0 4.80 66 70 73 5405
5 Cluster_1 68.8 4.44 65 68 72 4887
6 Cluster_2 67.9 4.10 65 67 71 4291
7 Cluster_5 67.8 4.12 64 67 71 5800
ggplot(aes(x = reorder(name, -true_prop),
y = proportion,
fill = reorder(name, -true_prop)),
data=all_cluster_prop) +
geom_bar(stat = "identity") +
geom_point(aes(x = reorder(name, -true_prop), y= true_prop),
color = "red",
data=all_cluster_prop,
show.legend = FALSE) +
scale_fill_brewer(palette="PRGn", direction = -1) +
labs(x = "card", fill="card type", title="Propotion of card types in each cluster") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8)) +
facet_wrap(vars(cluster))ggplot(all_cluster_prop, aes(x = proportion, y = cluster, color = name)) +
geom_point(
size = 3,
alpa = 0.7,
position = position_jitter(height = 0.2, seed = 110)
) +
scale_color_brewer(palette = "Paired") +
labs(
x = "Proportion",
y = "Cluster",
color = "Card Type",
title = "Card Proportions by Cluster"
) +
theme_minimal() +
theme(
panel.grid.major.y = element_line(color = "grey90"),
panel.grid.minor = element_blank(),
legend.position = "right"
)ggplot(aes(x = cluster,
y = proportion,
fill = factor(num_exact, levels = rev(0:7))),
data = all_exact
) +
geom_bar(position = "fill", stat = "identity") +
scale_fill_viridis_d() +
facet_wrap(vars(card)) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8)) +
labs(x="Clusters",
fill="Exact number of cards",
title="Exact number of cards in each cluster")ggplot(all_exact, aes(x = num_exact, y = proportion, color = cluster)) +
geom_line(size = 0.5) +
facet_wrap(~ card) +
scale_x_continuous(breaks = 0:7) +
labs(x = "Exact Number of Cards",
y = "Proportion",
color = "Cluster",
title = "Exact number of cards in each cluster") +
theme_minimal() +
theme(legend.position = "bottom",
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
strip.text = element_text(size = 10, face = "bold"),
panel.spacing = unit(1, "lines"))ggplot(aes(x=col, y=row, fill=proportion), data=(cluster_position %>% filter(card == "Bear"))) +
geom_tile() +
theme_minimal() +
scale_fill_gradient(low="#F0F0F0", high="#006837") +
facet_wrap(vars(cluster)) +
labs(title="Bear positions across clusters")ggplot(aes(x=col, y=row, fill=proportion), data=(cluster_position %>% filter(card == "Bee"))) +
geom_tile() +
theme_minimal() +
scale_fill_gradient(low="#F0F0F0", high="#006837") +
facet_wrap(vars(cluster)) +
labs(title="Bee positions across clusters")ggplot(aes(x=col, y=row, fill=proportion), data=(cluster_position %>% filter(card == "Meadow"))) +
geom_tile() +
theme_minimal() +
scale_fill_gradient(low="#F0F0F0", high="#006837") +
facet_wrap(vars(cluster)) +
labs(title="Meadow positions across clusters")ggplot(aes(x=col, y=row, fill=proportion), data=(cluster_position %>% filter(card == "Trout"))) +
geom_tile() +
theme_minimal() +
scale_fill_gradient(low="#F0F0F0", high="#006837") +
facet_wrap(vars(cluster)) +
labs(title="Trout positions across clusters")ggplot(aes(x=col, y=row, fill=proportion), data=(cluster_position %>% filter(card == "Eagle"))) +
geom_tile() +
theme_minimal() +
scale_fill_gradient(low="#F0F0F0", high="#006837") +
facet_wrap(vars(cluster)) +
labs(title="Eagle positions across clusters")ggplot(aes(x=col, y=row, fill=proportion), data=(cluster_position %>% filter(card == "Rabbit"))) +
geom_tile() +
theme_minimal() +
scale_fill_gradient(low="#F0F0F0", high="#006837") +
facet_wrap(vars(cluster)) +
labs(title="Rabbit positions across clusters")ggplot(aes(x=col, y=row, fill=proportion), data=(cluster_position %>% filter(card == "Dragonfly"))) +
geom_tile() +
theme_minimal() +
scale_fill_gradient(low="#F0F0F0", high="#006837") +
facet_wrap(vars(cluster)) +
labs(title="Dragonfly positions across clusters")ggplot(aes(x=col, y=row, fill=proportion), data=(cluster_position %>% filter(card == "Fox"))) +
geom_tile() +
theme_minimal() +
scale_fill_gradient(low="#F0F0F0", high="#006837") +
facet_wrap(vars(cluster)) +
labs(title="Fox positions across clusters")ggplot(aes(x=col, y=row, fill=proportion), data=(cluster_position %>% filter(card == "Deer"))) +
geom_tile() +
theme_minimal() +
scale_fill_gradient(low="#F0F0F0", high="#006837") +
facet_wrap(vars(cluster)) +
labs(title="Deer positions across clusters")ggplot(aes(x=col, y=row, fill=proportion), data=(cluster_position %>% filter(card == "Stream"))) +
geom_tile() +
theme_minimal() +
scale_fill_gradient(low="#F0F0F0", high="#006837") +
facet_wrap(vars(cluster)) +
labs(title="Stream positions across clusters")ggplot(aes(x=col, y=row, fill=proportion), data=(cluster_position %>% filter(card == "Wolf"))) +
geom_tile() +
theme_minimal() +
scale_fill_gradient(low="#F0F0F0", high="#006837") +
facet_wrap(vars(cluster)) +
labs(title="Wolf positions across clusters")db_pos = read_csv(here::here("database.csv"))neighbors = db_pos |>
filter(score > 60) |>
select(!pool) |>
pivot_longer(!c(ID, score), names_to = "position", values_to = "card") |>
mutate(row = substr(position, start = 4, stop = 4),
col = substr(position, start = 8, stop = 8),
row = as.numeric(row),
col = as.numeric(col))# neighboring card on right
neighbors = neighbors |>
left_join(neighbors |>
mutate(col = col - 1) |>
rename(right_neighbor = card) |>
select(ID, row, col, right_neighbor),
join_by(ID, row, col))
# neighboring card on left
neighbors = neighbors |>
left_join(neighbors |>
mutate(col = col + 1) |>
rename(left_neighbor = card) |>
select(ID, row, col, left_neighbor),
join_by(ID, row, col))
# neighboring card up
neighbors = neighbors |>
left_join(neighbors |>
mutate(row = row - 1) |>
rename(up_neighbor = card) |>
select(ID, row, col, up_neighbor),
join_by(ID, row, col))
# neighboring card down
neighbors = neighbors |>
left_join(neighbors |>
mutate(row = row + 1) |>
rename(down_neighbor = card) |>
select(ID, row, col, down_neighbor),
join_by(ID, row, col))neighbors |> head(10)# A tibble: 10 × 10
ID score position card row col right_neighbor left_neighbor
<dbl> <dbl> <chr> <chr> <dbl> <dbl> <chr> <chr>
1 1 67 row1col1 Meadow 1 1 Meadow <NA>
2 1 67 row1col2 Meadow 1 2 Deer Meadow
3 1 67 row1col3 Deer 1 3 Dragonfly Meadow
4 1 67 row1col4 Dragonfly 1 4 Fox Deer
5 1 67 row1col5 Fox 1 5 <NA> Dragonfly
6 1 67 row2col1 Meadow 2 1 Bee <NA>
7 1 67 row2col2 Bee 2 2 Bear Meadow
8 1 67 row2col3 Bear 2 3 Trout Bee
9 1 67 row2col4 Trout 2 4 Deer Bear
10 1 67 row2col5 Deer 2 5 <NA> Trout
# ℹ 2 more variables: up_neighbor <chr>, down_neighbor <chr>
neighbors_long = neighbors |>
pivot_longer(!c(ID, score, position, row, col, card),
names_to = "neighbor",
values_to = "neighbor_card")neighbors_long |> head(10)# A tibble: 10 × 8
ID score position card row col neighbor neighbor_card
<dbl> <dbl> <chr> <chr> <dbl> <dbl> <chr> <chr>
1 1 67 row1col1 Meadow 1 1 right_neighbor Meadow
2 1 67 row1col1 Meadow 1 1 left_neighbor <NA>
3 1 67 row1col1 Meadow 1 1 up_neighbor Meadow
4 1 67 row1col1 Meadow 1 1 down_neighbor <NA>
5 1 67 row1col2 Meadow 1 2 right_neighbor Deer
6 1 67 row1col2 Meadow 1 2 left_neighbor Meadow
7 1 67 row1col2 Meadow 1 2 up_neighbor Bee
8 1 67 row1col2 Meadow 1 2 down_neighbor <NA>
9 1 67 row1col3 Deer 1 3 right_neighbor Dragonfly
10 1 67 row1col3 Deer 1 3 left_neighbor Meadow
neighbors_sum = neighbors_long |>
filter(!is.na(neighbor_card)) |>
group_by(card) |>
count(neighbor_card) |>
mutate(proportion = n / sum(n))ggplot(neighbors_sum,
aes(x = card,
y = neighbor_card,
fill = proportion)) +
geom_tile() +
scale_fill_distiller(palette = "Greens", direction = 1)ggplot(neighbors_sum,
aes(x = card,
fill = neighbor_card,
y = proportion)) +
geom_bar(position = "fill", stat = "identity") +
scale_fill_viridis_d()ggplot(neighbors_sum,
aes(x = neighbor_card,
y = proportion,
fill = neighbor_card)) +
geom_bar(stat = "identity") +
scale_fill_viridis_d() +
facet_wrap(vars(card))db_cluster = read_csv(here::here("normalized_clusters.csv"))neighbors_cluster = db_cluster |>
pivot_longer(!c(ID, pool, score, contains("cluster")),
names_to = "position", values_to = "card") |>
mutate(row = substr(position, start = 4, stop = 4),
col = substr(position, start = 8, stop = 8),
row = as.numeric(row),
col = as.numeric(col))# neighboring card on right
neighbors_cluster = neighbors_cluster |>
left_join(neighbors_cluster |>
mutate(col = col - 1) |>
rename(right_neighbor = card) |>
select(ID, row, col, right_neighbor),
join_by(ID, row, col))
# neighboring card on left
neighbors_cluster = neighbors_cluster |>
left_join(neighbors_cluster |>
mutate(col = col + 1) |>
rename(left_neighbor = card) |>
select(ID, row, col, left_neighbor),
join_by(ID, row, col))
# neighboring card up
neighbors_cluster = neighbors_cluster |>
left_join(neighbors_cluster |>
mutate(row = row - 1) |>
rename(up_neighbor = card) |>
select(ID, row, col, up_neighbor),
join_by(ID, row, col))
# neighboring card down
neighbors_cluster = neighbors_cluster |>
left_join(neighbors_cluster |>
mutate(row = row + 1) |>
rename(down_neighbor = card) |>
select(ID, row, col, down_neighbor),
join_by(ID, row, col))neighbors_cluster |> head(10)# A tibble: 10 × 16
ID score pool `7cluster` `6cluster` `5cluster` `4cluster` `3cluster`
<dbl> <dbl> <chr> <chr> <chr> <chr> <chr> <chr>
1 1 67 default Cluster_1 Cluster_1 Cluster_1 Cluster_1 Cluster_1
2 1 67 default Cluster_1 Cluster_1 Cluster_1 Cluster_1 Cluster_1
3 1 67 default Cluster_1 Cluster_1 Cluster_1 Cluster_1 Cluster_1
4 1 67 default Cluster_1 Cluster_1 Cluster_1 Cluster_1 Cluster_1
5 1 67 default Cluster_1 Cluster_1 Cluster_1 Cluster_1 Cluster_1
6 1 67 default Cluster_1 Cluster_1 Cluster_1 Cluster_1 Cluster_1
7 1 67 default Cluster_1 Cluster_1 Cluster_1 Cluster_1 Cluster_1
8 1 67 default Cluster_1 Cluster_1 Cluster_1 Cluster_1 Cluster_1
9 1 67 default Cluster_1 Cluster_1 Cluster_1 Cluster_1 Cluster_1
10 1 67 default Cluster_1 Cluster_1 Cluster_1 Cluster_1 Cluster_1
# ℹ 8 more variables: position <chr>, card <chr>, row <dbl>, col <dbl>,
# right_neighbor <chr>, left_neighbor <chr>, up_neighbor <chr>,
# down_neighbor <chr>
neighbors_cluster_long = neighbors_cluster |>
pivot_longer(!c(ID, pool, contains("cluster"), score, position, row, col, card),
names_to = "neighbor",
values_to = "neighbor_card")neighbors_cluster_long |> head(10)# A tibble: 10 × 14
ID score pool `7cluster` `6cluster` `5cluster` `4cluster` `3cluster`
<dbl> <dbl> <chr> <chr> <chr> <chr> <chr> <chr>
1 1 67 default Cluster_1 Cluster_1 Cluster_1 Cluster_1 Cluster_1
2 1 67 default Cluster_1 Cluster_1 Cluster_1 Cluster_1 Cluster_1
3 1 67 default Cluster_1 Cluster_1 Cluster_1 Cluster_1 Cluster_1
4 1 67 default Cluster_1 Cluster_1 Cluster_1 Cluster_1 Cluster_1
5 1 67 default Cluster_1 Cluster_1 Cluster_1 Cluster_1 Cluster_1
6 1 67 default Cluster_1 Cluster_1 Cluster_1 Cluster_1 Cluster_1
7 1 67 default Cluster_1 Cluster_1 Cluster_1 Cluster_1 Cluster_1
8 1 67 default Cluster_1 Cluster_1 Cluster_1 Cluster_1 Cluster_1
9 1 67 default Cluster_1 Cluster_1 Cluster_1 Cluster_1 Cluster_1
10 1 67 default Cluster_1 Cluster_1 Cluster_1 Cluster_1 Cluster_1
# ℹ 6 more variables: position <chr>, card <chr>, row <dbl>, col <dbl>,
# neighbor <chr>, neighbor_card <chr>
neighbors_sum2 = neighbors_cluster_long |>
filter(!is.na(neighbor_card)) |>
group_by(card, `7cluster`) |>
count(neighbor_card) |>
mutate(proportion = n / sum(n))ggplot(neighbors_sum2,
aes(x = `7cluster`,
fill = neighbor_card,
y = proportion)) +
geom_bar(position = "fill", stat = "identity") +
scale_fill_viridis_d() +
facet_wrap(vars(card)) +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8))ggplot(neighbors_sum2,
aes(x = card,
y = neighbor_card,
fill = proportion)) +
geom_tile() +
scale_fill_distiller(palette = "Greens", direction = 1) +
facet_wrap(vars(`7cluster`)) +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8))ggplot(neighbors_sum2 %>% filter(card == "Bear"),
aes(x = neighbor_card,
y = proportion,
fill = neighbor_card)) +
geom_bar(stat = "identity") +
scale_fill_viridis_d() +
theme_minimal() +
facet_wrap(vars(`7cluster`)) +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8)) +
labs(title="Bear neighbors in each cluster")ggplot(neighbors_sum2 %>% filter(card == "Bee"),
aes(x = neighbor_card,
y = proportion,
fill = neighbor_card)) +
geom_bar(stat = "identity") +
scale_fill_viridis_d() +
theme_minimal() +
facet_wrap(vars(`7cluster`)) +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8)) +
labs(title="Bee neighbors in each cluster")ggplot(neighbors_sum2 %>% filter(card == "Meadow"),
aes(x = neighbor_card,
y = proportion,
fill = neighbor_card)) +
geom_bar(stat = "identity") +
scale_fill_viridis_d() +
theme_minimal() +
facet_wrap(vars(`7cluster`)) +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8)) +
labs(title="Meadow neighbors in each cluster")ggplot(neighbors_sum2 %>% filter(card == "Trout"),
aes(x = neighbor_card,
y = proportion,
fill = neighbor_card)) +
geom_bar(stat = "identity") +
scale_fill_viridis_d() +
theme_minimal() +
facet_wrap(vars(`7cluster`)) +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8)) +
labs(title="Trout neighbors in each cluster")ggplot(neighbors_sum2 %>% filter(card == "Eagle"),
aes(x = neighbor_card,
y = proportion,
fill = neighbor_card)) +
geom_bar(stat = "identity") +
scale_fill_viridis_d() +
theme_minimal() +
facet_wrap(vars(`7cluster`)) +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8)) +
labs(title="Eagle neighbors in each cluster")ggplot(neighbors_sum2 %>% filter(card == "Rabbit"),
aes(x = neighbor_card,
y = proportion,
fill = neighbor_card)) +
geom_bar(stat = "identity") +
scale_fill_viridis_d() +
theme_minimal() +
facet_wrap(vars(`7cluster`)) +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8)) +
labs(title="Rabbit neighbors in each cluster")ggplot(neighbors_sum2 %>% filter(card == "Dragonfly"),
aes(x = neighbor_card,
y = proportion,
fill = neighbor_card)) +
geom_bar(stat = "identity") +
scale_fill_viridis_d() +
theme_minimal() +
facet_wrap(vars(`7cluster`)) +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8)) +
labs(title="Dragonfly neighbors in each cluster")ggplot(neighbors_sum2 %>% filter(card == "Fox"),
aes(x = neighbor_card,
y = proportion,
fill = neighbor_card)) +
geom_bar(stat = "identity") +
scale_fill_viridis_d() +
theme_minimal() +
facet_wrap(vars(`7cluster`)) +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8)) +
labs(title="Fox neighbors in each cluster")ggplot(neighbors_sum2 %>% filter(card == "Deer"),
aes(x = neighbor_card,
y = proportion,
fill = neighbor_card)) +
geom_bar(stat = "identity") +
scale_fill_viridis_d() +
theme_minimal() +
facet_wrap(vars(`7cluster`)) +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8)) +
labs(title="Deer neighbors in each cluster")ggplot(neighbors_sum2 %>% filter(card == "Stream"),
aes(x = neighbor_card,
y = proportion,
fill = neighbor_card)) +
geom_bar(stat = "identity") +
scale_fill_viridis_d() +
theme_minimal() +
facet_wrap(vars(`7cluster`)) +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8)) +
labs(title="Stream neighbors in each cluster")ggplot(neighbors_sum2 %>% filter(card == "Wolf"),
aes(x = neighbor_card,
y = proportion,
fill = neighbor_card)) +
geom_bar(stat = "identity") +
scale_fill_viridis_d() +
theme_minimal() +
facet_wrap(vars(`7cluster`)) +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8)) +
labs(title="Wolf neighbors in each cluster")db_individual_removed <- read.csv(here::here("db_individual_lowest_removed.csv"))cluster_merge <- merge(x = cluster_data, y = db_individual_removed, by = "ID")ggplot(aes(x = score), data = cluster_merge) +
geom_histogram(binwidth = 5, fill = "green3") +
theme_minimal() +
facet_wrap(vars(X7cluster)) +
labs(title="Score distribution by cluster")ggplot(cluster_merge, aes(x = score, color = factor(X7cluster))) +
geom_density(size = 1.2) +
labs(x = "Score", y = "Density", color = "Cluster", title="Score distribution by cluster") +
theme_minimal()low_score = database_new %>% anti_join(cluster_merge, by = "ID")ggplot(cluster_merge, aes(x = score, color = factor(X7cluster))) +
geom_density(size = 1.2) +
geom_density(aes(x=score, color = "All grids"), size = 1.2, data=database_new) +
labs(x = "Score", y = "Density", color = "Cluster", title="Score distribution by cluster") +
theme_minimal()# write.csv(cluster_merge, here::here("cluster_merge.csv"), row.names = FALSE)# write.csv(neighbors_cluster_long, here::here("neighbor-cluster.csv"), row.names = FALSE)two_player <- list(c(37,4,2,2), c(45,3,3,3))
four_player <- list(c(37,5,1,2), c(45,2,4,3), c(60,0,0,0), c(47,4,3,4))
five_player <- list(c(37,3,0,2), c(45,2,4,3), c(60,3,0,0), c(47,2,0,4), c(38,1,4,4))mp_score(two_player)[1] 37 45
[1] 45 50
[1] 53 62
[1] 65 69
mp_score(four_player)[1] 37 45 60 47
[1] 45 45 60 52
[1] 49 57 60 60
[1] 61 60 72 63
mp_score(five_player)[1] 37 45 60 47 38
[1] 45 45 68 47 38
[1] 45 57 68 47 50
[1] 52 60 80 42 50
z <- score_grid(sample_grid)
z[1] 29 0 0 6
z2 <- score_grid(sample_grid2)
z2[1] 36 0 0 7
z3 <- score_grid(big_grid1)
z3[1] 46 3 1 0
set.seed(48)
test_grid <- generate_grid(cards)
test_grid [,1] [,2] [,3] [,4] [,5]
[1,] "Dragonfly" "Dragonfly" "Stream" "Meadow" "Fox"
[2,] "Bear" "Trout" "Meadow" "Fox" "Eagle"
[3,] "Trout" "Rabbit" "Stream" "Bee" "Stream"
[4,] "Wolf" "Rabbit" "Deer" "Meadow" "Bee"
x <- find_cardinals(0,3,test_grid)
x[[1]]
[1] 1 3
[[2]]
[1] 0 4
[[3]]
[1] 0 2
c <- sample(x, 1)
c[[1]]
[1] 1 3
"el"[1] "el"
c[[1]][1][1] 1
score_grid(test_grid)[1] 26 1 1 2
solo_score(score_grid(test_grid))[1] 40
set.seed(48)
baseline_scores <- baseline_sim(cards)mean(baseline_scores)[1] 29.0535
sd(baseline_scores)[1] 9.467661
var(baseline_scores)[1] 89.6366
max(baseline_scores)[1] 64
min(baseline_scores)[1] 2
summary(baseline_scores) Min. 1st Qu. Median Mean 3rd Qu. Max.
2.00 23.00 29.00 29.05 35.25 64.00
baseline_data <- data.frame(baseline_scores)
ggplot(aes(x = baseline_scores), data = baseline_data) +
geom_histogram(binwidth = 5, fill = "steelblue")